5.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 1 Jan 2020 21:37:07 +0000 (22:37 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 1 Jan 2020 21:37:07 +0000 (22:37 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 1 Jan 2020 21:37:07 +0000 (22:37 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 1 Jan 2020 21:37:07 +0000 (22:37 +0100)
diff --git a/queue-5.4/bnxt-apply-computed-clamp-value-for-coalece-parameter.patch b/queue-5.4/bnxt-apply-computed-clamp-value-for-coalece-parameter.patch

new file mode 100644 (file)

index 0000000..35b7132
--- /dev/null
+++ b/queue-5.4/bnxt-apply-computed-clamp-value-for-coalece-parameter.patch
@@ -0,0 +1,36 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Jonathan Lemon <jonathan.lemon@gmail.com>
+Date: Tue, 10 Dec 2019 08:39:46 -0800
+Subject: bnxt: apply computed clamp value for coalece parameter
+
+From: Jonathan Lemon <jonathan.lemon@gmail.com>
+
+[ Upstream commit 6adc4601c2a1ac87b4ab8ed0cb55db6efd0264e8 ]
+
+After executing "ethtool -C eth0 rx-usecs-irq 0", the box becomes
+unresponsive, likely due to interrupt livelock.  It appears that
+a minimum clamp value for the irq timer is computed, but is never
+applied.
+
+Fix by applying the corrected clamp value.
+
+Fixes: 74706afa712d ("bnxt_en: Update interrupt coalescing logic.")
+Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -6178,7 +6178,7 @@ static void bnxt_hwrm_set_coal_params(st
+               tmr = bnxt_usec_to_coal_tmr(bp, hw_coal->coal_ticks_irq);
+               val = clamp_t(u16, tmr, 1,
+                             coal_cap->cmpl_aggr_dma_tmr_during_int_max);
+-              req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(tmr);
++              req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(val);
+               req->enables |=
+                       cpu_to_le16(BNXT_COAL_CMPL_AGGR_TMR_DURING_INT_ENABLE);
+       }
diff --git a/queue-5.4/bnxt_en-add-missing-devlink-health-reporters-for-vfs.patch b/queue-5.4/bnxt_en-add-missing-devlink-health-reporters-for-vfs.patch

new file mode 100644 (file)

index 0000000..85a0fe7
--- /dev/null
+++ b/queue-5.4/bnxt_en-add-missing-devlink-health-reporters-for-vfs.patch
@@ -0,0 +1,110 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:13 -0500
+Subject: bnxt_en: Add missing devlink health reporters for VFs.
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit 7e334fc8003c7a38372cc98e7be6082670a47d29 ]
+
+The VF driver also needs to create the health reporters since
+VFs are also involved in firmware reset and recovery.  Modify
+bnxt_dl_register() and bnxt_dl_unregister() so that they can
+be called by the VFs to register/unregister devlink.  Only the PF
+will register the devlink parameters.  With devlink registered,
+we can now create the health reporters on the VFs.
+
+Fixes: 6763c779c2d8 ("bnxt_en: Add new FW devlink_health_reporter")
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c         |   13 ++++--------
+ drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c |   23 ++++++++++++++++------
+ 2 files changed, 22 insertions(+), 14 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -11343,12 +11343,11 @@ static void bnxt_remove_one(struct pci_d
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct bnxt *bp = netdev_priv(dev);
+ 
+-      if (BNXT_PF(bp)) {
++      if (BNXT_PF(bp))
+               bnxt_sriov_disable(bp);
+-              bnxt_dl_fw_reporters_destroy(bp, true);
+-              bnxt_dl_unregister(bp);
+-      }
+ 
++      bnxt_dl_fw_reporters_destroy(bp, true);
++      bnxt_dl_unregister(bp);
+       pci_disable_pcie_error_reporting(pdev);
+       unregister_netdev(dev);
+       bnxt_shutdown_tc(bp);
+@@ -11844,10 +11843,8 @@ static int bnxt_init_one(struct pci_dev
+       if (rc)
+               goto init_err_cleanup_tc;
+ 
+-      if (BNXT_PF(bp)) {
+-              bnxt_dl_register(bp);
+-              bnxt_dl_fw_reporters_create(bp);
+-      }
++      bnxt_dl_register(bp);
++      bnxt_dl_fw_reporters_create(bp);
+ 
+       netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
+                   board_info[ent->driver_data].name,
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+@@ -226,6 +226,8 @@ static const struct devlink_ops bnxt_dl_
+ #endif /* CONFIG_BNXT_SRIOV */
+ };
+ 
++static const struct devlink_ops bnxt_vf_dl_ops;
++
+ enum bnxt_dl_param_id {
+       BNXT_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+       BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK,
+@@ -439,7 +441,10 @@ int bnxt_dl_register(struct bnxt *bp)
+               return -ENOTSUPP;
+       }
+ 
+-      dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
++      if (BNXT_PF(bp))
++              dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
++      else
++              dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl));
+       if (!dl) {
+               netdev_warn(bp->dev, "devlink_alloc failed");
+               return -ENOMEM;
+@@ -458,6 +463,9 @@ int bnxt_dl_register(struct bnxt *bp)
+               goto err_dl_free;
+       }
+ 
++      if (!BNXT_PF(bp))
++              return 0;
++
+       rc = devlink_params_register(dl, bnxt_dl_params,
+                                    ARRAY_SIZE(bnxt_dl_params));
+       if (rc) {
+@@ -507,11 +515,14 @@ void bnxt_dl_unregister(struct bnxt *bp)
+       if (!dl)
+               return;
+ 
+-      devlink_port_params_unregister(&bp->dl_port, bnxt_dl_port_params,
+-                                     ARRAY_SIZE(bnxt_dl_port_params));
+-      devlink_port_unregister(&bp->dl_port);
+-      devlink_params_unregister(dl, bnxt_dl_params,
+-                                ARRAY_SIZE(bnxt_dl_params));
++      if (BNXT_PF(bp)) {
++              devlink_port_params_unregister(&bp->dl_port,
++                                             bnxt_dl_port_params,
++                                             ARRAY_SIZE(bnxt_dl_port_params));
++              devlink_port_unregister(&bp->dl_port);
++              devlink_params_unregister(dl, bnxt_dl_params,
++                                        ARRAY_SIZE(bnxt_dl_params));
++      }
+       devlink_unregister(dl);
+       devlink_free(dl);
+ }
diff --git a/queue-5.4/bnxt_en-fix-bp-fw_health-allocation-and-free-logic.patch b/queue-5.4/bnxt_en-fix-bp-fw_health-allocation-and-free-logic.patch

new file mode 100644 (file)

index 0000000..1aa886a
--- /dev/null
+++ b/queue-5.4/bnxt_en-fix-bp-fw_health-allocation-and-free-logic.patch
@@ -0,0 +1,119 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:10 -0500
+Subject: bnxt_en: Fix bp->fw_health allocation and free logic.
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit 8280b38e01f71e0f89389ccad3fa43b79e57c604 ]
+
+bp->fw_health needs to be allocated for either the firmware initiated
+reset feature or the driver initiated error recovery feature.  The
+current code is not allocating bp->fw_health for all the necessary cases.
+This patch corrects the logic to allocate bp->fw_health correctly when
+needed.  If allocation fails, we clear the feature flags.
+
+We also add the the missing kfree(bp->fw_health) when the driver is
+unloaded.  If we get an async reset message from the firmware, we also
+need to make sure that we have a valid bp->fw_health before proceeding.
+
+Fixes: 07f83d72d238 ("bnxt_en: Discover firmware error recovery capabilities.")
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |   36 +++++++++++++++++++++---------
+ drivers/net/ethernet/broadcom/bnxt/bnxt.h |    1 
+ 2 files changed, 27 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -1995,6 +1995,9 @@ static int bnxt_async_event_process(stru
+       case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: {
+               u32 data1 = le32_to_cpu(cmpl->event_data1);
+ 
++              if (!bp->fw_health)
++                      goto async_event_process_exit;
++
+               bp->fw_reset_timestamp = jiffies;
+               bp->fw_reset_min_dsecs = cmpl->timestamp_lo;
+               if (!bp->fw_reset_min_dsecs)
+@@ -4438,8 +4441,9 @@ static int bnxt_hwrm_func_drv_rgtr(struc
+                           FUNC_DRV_RGTR_REQ_ENABLES_VER);
+ 
+       req.os_type = cpu_to_le16(FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX);
+-      flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE |
+-              FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT;
++      flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE;
++      if (bp->fw_cap & BNXT_FW_CAP_HOT_RESET)
++              flags |= FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT;
+       if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
+               flags |= FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT;
+       req.flags = cpu_to_le32(flags);
+@@ -7096,14 +7100,6 @@ static int bnxt_hwrm_error_recovery_qcfg
+       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       if (rc)
+               goto err_recovery_out;
+-      if (!fw_health) {
+-              fw_health = kzalloc(sizeof(*fw_health), GFP_KERNEL);
+-              bp->fw_health = fw_health;
+-              if (!fw_health) {
+-                      rc = -ENOMEM;
+-                      goto err_recovery_out;
+-              }
+-      }
+       fw_health->flags = le32_to_cpu(resp->flags);
+       if ((fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) &&
+           !(bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL)) {
+@@ -10419,6 +10415,23 @@ static void bnxt_init_dflt_coal(struct b
+       bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
+ }
+ 
++static void bnxt_alloc_fw_health(struct bnxt *bp)
++{
++      if (bp->fw_health)
++              return;
++
++      if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) &&
++          !(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
++              return;
++
++      bp->fw_health = kzalloc(sizeof(*bp->fw_health), GFP_KERNEL);
++      if (!bp->fw_health) {
++              netdev_warn(bp->dev, "Failed to allocate fw_health\n");
++              bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET;
++              bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
++      }
++}
++
+ static int bnxt_fw_init_one_p1(struct bnxt *bp)
+ {
+       int rc;
+@@ -10465,6 +10478,7 @@ static int bnxt_fw_init_one_p2(struct bn
+               netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
+                           rc);
+ 
++      bnxt_alloc_fw_health(bp);
+       rc = bnxt_hwrm_error_recovery_qcfg(bp);
+       if (rc)
+               netdev_warn(bp->dev, "hwrm query error recovery failure rc: %d\n",
+@@ -11344,6 +11358,8 @@ static void bnxt_remove_one(struct pci_d
+       bnxt_dcb_free(bp);
+       kfree(bp->edev);
+       bp->edev = NULL;
++      kfree(bp->fw_health);
++      bp->fw_health = NULL;
+       bnxt_cleanup_pci(bp);
+       bnxt_free_ctx_mem(bp);
+       kfree(bp->ctx);
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+@@ -1658,6 +1658,7 @@ struct bnxt {
+       #define BNXT_FW_CAP_PCIE_STATS_SUPPORTED        0x00020000
+       #define BNXT_FW_CAP_EXT_STATS_SUPPORTED         0x00040000
+       #define BNXT_FW_CAP_ERR_RECOVER_RELOAD          0x00100000
++      #define BNXT_FW_CAP_HOT_RESET                   0x00200000
+ 
+ #define BNXT_NEW_RM(bp)               ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM)
+       u32                     hwrm_spec_code;
diff --git a/queue-5.4/bnxt_en-fix-msix-request-logic-for-rdma-driver.patch b/queue-5.4/bnxt_en-fix-msix-request-logic-for-rdma-driver.patch

new file mode 100644 (file)

index 0000000..339669c
--- /dev/null
+++ b/queue-5.4/bnxt_en-fix-msix-request-logic-for-rdma-driver.patch
@@ -0,0 +1,58 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Michael Chan <michael.chan@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:07 -0500
+Subject: bnxt_en: Fix MSIX request logic for RDMA driver.
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit 0c722ec0a289c7f6b53f89bad1cfb7c4db3f7a62 ]
+
+The logic needs to check both bp->total_irqs and the reserved IRQs in
+hw_resc->resv_irqs if applicable and see if both are enough to cover
+the L2 and RDMA requested vectors.  The current code is only checking
+bp->total_irqs and can fail in some code paths, such as the TX timeout
+code path with the RDMA driver requesting vectors after recovery.  In
+this code path, we have not reserved enough MSIX resources for the
+RDMA driver yet.
+
+Fixes: 75720e6323a1 ("bnxt_en: Keep track of reserved IRQs.")
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+@@ -113,8 +113,10 @@ static int bnxt_req_msix_vecs(struct bnx
+ {
+       struct net_device *dev = edev->net;
+       struct bnxt *bp = netdev_priv(dev);
++      struct bnxt_hw_resc *hw_resc;
+       int max_idx, max_cp_rings;
+       int avail_msix, idx;
++      int total_vecs;
+       int rc = 0;
+ 
+       ASSERT_RTNL();
+@@ -142,7 +144,10 @@ static int bnxt_req_msix_vecs(struct bnx
+       }
+       edev->ulp_tbl[ulp_id].msix_base = idx;
+       edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
+-      if (bp->total_irqs < (idx + avail_msix)) {
++      hw_resc = &bp->hw_resc;
++      total_vecs = idx + avail_msix;
++      if (bp->total_irqs < total_vecs ||
++          (BNXT_NEW_RM(bp) && hw_resc->resv_irqs < total_vecs)) {
+               if (netif_running(dev)) {
+                       bnxt_close_nic(bp, true, false);
+                       rc = bnxt_open_nic(bp, true, false);
+@@ -156,7 +161,6 @@ static int bnxt_req_msix_vecs(struct bnx
+       }
+ 
+       if (BNXT_NEW_RM(bp)) {
+-              struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+               int resv_msix;
+ 
+               resv_msix = hw_resc->resv_irqs - bp->cp_nr_rings;
diff --git a/queue-5.4/bnxt_en-fix-the-logic-that-creates-the-health-reporters.patch b/queue-5.4/bnxt_en-fix-the-logic-that-creates-the-health-reporters.patch

new file mode 100644 (file)

index 0000000..9cd3719
--- /dev/null
+++ b/queue-5.4/bnxt_en-fix-the-logic-that-creates-the-health-reporters.patch
@@ -0,0 +1,195 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:12 -0500
+Subject: bnxt_en: Fix the logic that creates the health reporters.
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit 937f188c1f4f89b3fa93ba31fc8587dc1fb14a22 ]
+
+Fix the logic to properly check the fw capabilities and create the
+devlink health reporters only when needed.  The current code creates
+the reporters unconditionally as long as bp->fw_health is valid, and
+that's not correct.
+
+Call bnxt_dl_fw_reporters_create() directly from the init and reset
+code path instead of from bnxt_dl_register().  This allows the
+reporters to be adjusted when capabilities change.  The same
+applies to bnxt_dl_fw_reporters_destroy().
+
+Fixes: 6763c779c2d8 ("bnxt_en: Add new FW devlink_health_reporter")
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c         |   11 +++
+ drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c |   64 +++++++++++++++-------
+ drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h |    2 
+ 3 files changed, 56 insertions(+), 21 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -10563,6 +10563,12 @@ static int bnxt_fw_init_one(struct bnxt
+       rc = bnxt_approve_mac(bp, bp->dev->dev_addr, false);
+       if (rc)
+               return rc;
++
++      /* In case fw capabilities have changed, destroy the unneeded
++       * reporters and create newly capable ones.
++       */
++      bnxt_dl_fw_reporters_destroy(bp, false);
++      bnxt_dl_fw_reporters_create(bp);
+       bnxt_fw_init_one_p3(bp);
+       return 0;
+ }
+@@ -11339,6 +11345,7 @@ static void bnxt_remove_one(struct pci_d
+ 
+       if (BNXT_PF(bp)) {
+               bnxt_sriov_disable(bp);
++              bnxt_dl_fw_reporters_destroy(bp, true);
+               bnxt_dl_unregister(bp);
+       }
+ 
+@@ -11837,8 +11844,10 @@ static int bnxt_init_one(struct pci_dev
+       if (rc)
+               goto init_err_cleanup_tc;
+ 
+-      if (BNXT_PF(bp))
++      if (BNXT_PF(bp)) {
+               bnxt_dl_register(bp);
++              bnxt_dl_fw_reporters_create(bp);
++      }
+ 
+       netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
+                   board_info[ent->driver_data].name,
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+@@ -102,21 +102,15 @@ struct devlink_health_reporter_ops bnxt_
+       .recover = bnxt_fw_fatal_recover,
+ };
+ 
+-static void bnxt_dl_fw_reporters_create(struct bnxt *bp)
++void bnxt_dl_fw_reporters_create(struct bnxt *bp)
+ {
+       struct bnxt_fw_health *health = bp->fw_health;
+ 
+-      if (!health)
++      if (!bp->dl || !health)
+               return;
+ 
+-      health->fw_reporter =
+-              devlink_health_reporter_create(bp->dl, &bnxt_dl_fw_reporter_ops,
+-                                             0, false, bp);
+-      if (IS_ERR(health->fw_reporter)) {
+-              netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
+-                          PTR_ERR(health->fw_reporter));
+-              health->fw_reporter = NULL;
+-      }
++      if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) || health->fw_reset_reporter)
++              goto err_recovery;
+ 
+       health->fw_reset_reporter =
+               devlink_health_reporter_create(bp->dl,
+@@ -126,8 +120,30 @@ static void bnxt_dl_fw_reporters_create(
+               netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
+                           PTR_ERR(health->fw_reset_reporter));
+               health->fw_reset_reporter = NULL;
++              bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET;
+       }
+ 
++err_recovery:
++      if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
++              return;
++
++      if (!health->fw_reporter) {
++              health->fw_reporter =
++                      devlink_health_reporter_create(bp->dl,
++                                                     &bnxt_dl_fw_reporter_ops,
++                                                     0, false, bp);
++              if (IS_ERR(health->fw_reporter)) {
++                      netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
++                                  PTR_ERR(health->fw_reporter));
++                      health->fw_reporter = NULL;
++                      bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
++                      return;
++              }
++      }
++
++      if (health->fw_fatal_reporter)
++              return;
++
+       health->fw_fatal_reporter =
+               devlink_health_reporter_create(bp->dl,
+                                              &bnxt_dl_fw_fatal_reporter_ops,
+@@ -136,24 +152,35 @@ static void bnxt_dl_fw_reporters_create(
+               netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
+                           PTR_ERR(health->fw_fatal_reporter));
+               health->fw_fatal_reporter = NULL;
++              bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
+       }
+ }
+ 
+-static void bnxt_dl_fw_reporters_destroy(struct bnxt *bp)
++void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all)
+ {
+       struct bnxt_fw_health *health = bp->fw_health;
+ 
+-      if (!health)
++      if (!bp->dl || !health)
+               return;
+ 
+-      if (health->fw_reporter)
+-              devlink_health_reporter_destroy(health->fw_reporter);
+-
+-      if (health->fw_reset_reporter)
++      if ((all || !(bp->fw_cap & BNXT_FW_CAP_HOT_RESET)) &&
++          health->fw_reset_reporter) {
+               devlink_health_reporter_destroy(health->fw_reset_reporter);
++              health->fw_reset_reporter = NULL;
++      }
+ 
+-      if (health->fw_fatal_reporter)
++      if ((bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) && !all)
++              return;
++
++      if (health->fw_reporter) {
++              devlink_health_reporter_destroy(health->fw_reporter);
++              health->fw_reporter = NULL;
++      }
++
++      if (health->fw_fatal_reporter) {
+               devlink_health_reporter_destroy(health->fw_fatal_reporter);
++              health->fw_fatal_reporter = NULL;
++      }
+ }
+ 
+ void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event)
+@@ -458,8 +485,6 @@ int bnxt_dl_register(struct bnxt *bp)
+ 
+       devlink_params_publish(dl);
+ 
+-      bnxt_dl_fw_reporters_create(bp);
+-
+       return 0;
+ 
+ err_dl_port_unreg:
+@@ -482,7 +507,6 @@ void bnxt_dl_unregister(struct bnxt *bp)
+       if (!dl)
+               return;
+ 
+-      bnxt_dl_fw_reporters_destroy(bp);
+       devlink_port_params_unregister(&bp->dl_port, bnxt_dl_port_params,
+                                      ARRAY_SIZE(bnxt_dl_port_params));
+       devlink_port_unregister(&bp->dl_port);
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
+@@ -57,6 +57,8 @@ struct bnxt_dl_nvm_param {
+ };
+ 
+ void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event);
++void bnxt_dl_fw_reporters_create(struct bnxt *bp);
++void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all);
+ int bnxt_dl_register(struct bnxt *bp);
+ void bnxt_dl_unregister(struct bnxt *bp);
+ 
diff --git a/queue-5.4/bnxt_en-free-context-memory-in-the-open-path-if-firmware-has-been-reset.patch b/queue-5.4/bnxt_en-free-context-memory-in-the-open-path-if-firmware-has-been-reset.patch

new file mode 100644 (file)

index 0000000..816d8d5
--- /dev/null
+++ b/queue-5.4/bnxt_en-free-context-memory-in-the-open-path-if-firmware-has-been-reset.patch
@@ -0,0 +1,34 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Michael Chan <michael.chan@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:08 -0500
+Subject: bnxt_en: Free context memory in the open path if firmware has been reset.
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit 325f85f37e5b35807d86185bdf2c64d2980c44ba ]
+
+This will trigger new context memory to be rediscovered and allocated
+during the re-probe process after a firmware reset.  Without this, the
+newly reset firmware does not have valid context memory and the driver
+will eventually fail to allocate some resources.
+
+Fixes: ec5d31e3c15d ("bnxt_en: Handle firmware reset status during IF_UP.")
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -8766,6 +8766,9 @@ static int bnxt_hwrm_if_change(struct bn
+       }
+       if (resc_reinit || fw_reset) {
+               if (fw_reset) {
++                      bnxt_free_ctx_mem(bp);
++                      kfree(bp->ctx);
++                      bp->ctx = NULL;
+                       rc = bnxt_fw_init_one(bp);
+                       if (rc) {
+                               set_bit(BNXT_STATE_ABORT_ERR, &bp->state);
diff --git a/queue-5.4/bnxt_en-remove-unnecessary-null-checks-for-fw_health.patch b/queue-5.4/bnxt_en-remove-unnecessary-null-checks-for-fw_health.patch

new file mode 100644 (file)

index 0000000..dd8a413
--- /dev/null
+++ b/queue-5.4/bnxt_en-remove-unnecessary-null-checks-for-fw_health.patch
@@ -0,0 +1,70 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:11 -0500
+Subject: bnxt_en: Remove unnecessary NULL checks for fw_health
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit 0797c10d2d1fa0d6f14612404781b348fc757c3e ]
+
+After fixing the allocation of bp->fw_health in the previous patch,
+the driver will not go through the fw reset and recovery code paths
+if bp->fw_health allocation fails.  So we can now remove the
+unnecessary NULL checks.
+
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c         |    6 ++----
+ drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c |    6 +-----
+ 2 files changed, 3 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -9953,8 +9953,7 @@ static void bnxt_fw_health_check(struct
+       struct bnxt_fw_health *fw_health = bp->fw_health;
+       u32 val;
+ 
+-      if (!fw_health || !fw_health->enabled ||
+-          test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
++      if (!fw_health->enabled || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+               return;
+ 
+       if (fw_health->tmr_counter) {
+@@ -10697,8 +10696,7 @@ static void bnxt_fw_reset_task(struct wo
+               bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10);
+               return;
+       case BNXT_FW_RESET_STATE_ENABLE_DEV:
+-              if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) &&
+-                  bp->fw_health) {
++              if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) {
+                       u32 val;
+ 
+                       val = bnxt_fw_health_readl(bp,
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+@@ -19,11 +19,10 @@ static int bnxt_fw_reporter_diagnose(str
+                                    struct devlink_fmsg *fmsg)
+ {
+       struct bnxt *bp = devlink_health_reporter_priv(reporter);
+-      struct bnxt_fw_health *health = bp->fw_health;
+       u32 val, health_status;
+       int rc;
+ 
+-      if (!health || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
++      if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+               return 0;
+ 
+       val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+@@ -162,9 +161,6 @@ void bnxt_devlink_health_report(struct b
+       struct bnxt_fw_health *fw_health = bp->fw_health;
+       struct bnxt_fw_reporter_ctx fw_reporter_ctx;
+ 
+-      if (!fw_health)
+-              return;
+-
+       fw_reporter_ctx.sp_event = event;
+       switch (event) {
+       case BNXT_FW_RESET_NOTIFY_SP_EVENT:
diff --git a/queue-5.4/bnxt_en-return-error-if-fw-returns-more-data-than-dump-length.patch b/queue-5.4/bnxt_en-return-error-if-fw-returns-more-data-than-dump-length.patch

new file mode 100644 (file)

index 0000000..708a23a
--- /dev/null
+++ b/queue-5.4/bnxt_en-return-error-if-fw-returns-more-data-than-dump-length.patch
@@ -0,0 +1,144 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Tue, 10 Dec 2019 02:49:09 -0500
+Subject: bnxt_en: Return error if FW returns more data than dump length
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit c74751f4c39232c31214ec6a3bc1c7e62f5c728b ]
+
+If any change happened in the configuration of VF in VM while
+collecting live dump, there could be a race and firmware can return
+more data than allocated dump length. Fix it by keeping track of
+the accumulated core dump length copied so far and abort the copy
+with error code if the next chunk of core dump will exceed the
+original dump length.
+
+Fixes: 6c5657d085ae ("bnxt_en: Add support for ethtool get dump.")
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c |   38 +++++++++++++++++-----
+ drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h |    4 ++
+ 2 files changed, 34 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+@@ -3064,8 +3064,15 @@ static int bnxt_hwrm_dbg_dma_data(struct
+                       }
+               }
+ 
+-              if (info->dest_buf)
+-                      memcpy(info->dest_buf + off, dma_buf, len);
++              if (info->dest_buf) {
++                      if ((info->seg_start + off + len) <=
++                          BNXT_COREDUMP_BUF_LEN(info->buf_len)) {
++                              memcpy(info->dest_buf + off, dma_buf, len);
++                      } else {
++                              rc = -ENOBUFS;
++                              break;
++                      }
++              }
+ 
+               if (cmn_req->req_type ==
+                               cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE))
+@@ -3119,7 +3126,7 @@ static int bnxt_hwrm_dbg_coredump_initia
+ 
+ static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
+                                          u16 segment_id, u32 *seg_len,
+-                                         void *buf, u32 offset)
++                                         void *buf, u32 buf_len, u32 offset)
+ {
+       struct hwrm_dbg_coredump_retrieve_input req = {0};
+       struct bnxt_hwrm_dbg_dma_info info = {NULL};
+@@ -3134,8 +3141,11 @@ static int bnxt_hwrm_dbg_coredump_retrie
+                               seq_no);
+       info.data_len_off = offsetof(struct hwrm_dbg_coredump_retrieve_output,
+                                    data_len);
+-      if (buf)
++      if (buf) {
+               info.dest_buf = buf + offset;
++              info.buf_len = buf_len;
++              info.seg_start = offset;
++      }
+ 
+       rc = bnxt_hwrm_dbg_dma_data(bp, &req, sizeof(req), &info);
+       if (!rc)
+@@ -3225,14 +3235,17 @@ bnxt_fill_coredump_record(struct bnxt *b
+ static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
+ {
+       u32 ver_get_resp_len = sizeof(struct hwrm_ver_get_output);
++      u32 offset = 0, seg_hdr_len, seg_record_len, buf_len = 0;
+       struct coredump_segment_record *seg_record = NULL;
+-      u32 offset = 0, seg_hdr_len, seg_record_len;
+       struct bnxt_coredump_segment_hdr seg_hdr;
+       struct bnxt_coredump coredump = {NULL};
+       time64_t start_time;
+       u16 start_utc;
+       int rc = 0, i;
+ 
++      if (buf)
++              buf_len = *dump_len;
++
+       start_time = ktime_get_real_seconds();
+       start_utc = sys_tz.tz_minuteswest * 60;
+       seg_hdr_len = sizeof(seg_hdr);
+@@ -3265,6 +3278,12 @@ static int bnxt_get_coredump(struct bnxt
+               u32 duration = 0, seg_len = 0;
+               unsigned long start, end;
+ 
++              if (buf && ((offset + seg_hdr_len) >
++                          BNXT_COREDUMP_BUF_LEN(buf_len))) {
++                      rc = -ENOBUFS;
++                      goto err;
++              }
++
+               start = jiffies;
+ 
+               rc = bnxt_hwrm_dbg_coredump_initiate(bp, comp_id, seg_id);
+@@ -3277,9 +3296,11 @@ static int bnxt_get_coredump(struct bnxt
+ 
+               /* Write segment data into the buffer */
+               rc = bnxt_hwrm_dbg_coredump_retrieve(bp, comp_id, seg_id,
+-                                                   &seg_len, buf,
++                                                   &seg_len, buf, buf_len,
+                                                    offset + seg_hdr_len);
+-              if (rc)
++              if (rc && rc == -ENOBUFS)
++                      goto err;
++              else if (rc)
+                       netdev_err(bp->dev,
+                                  "Failed to retrieve coredump for seg = %d\n",
+                                  seg_record->segment_id);
+@@ -3309,7 +3330,8 @@ err:
+                                         rc);
+       kfree(coredump.data);
+       *dump_len += sizeof(struct bnxt_coredump_record);
+-
++      if (rc == -ENOBUFS)
++              netdev_err(bp->dev, "Firmware returned large coredump buffer");
+       return rc;
+ }
+ 
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
+@@ -31,6 +31,8 @@ struct bnxt_coredump {
+       u16             total_segs;
+ };
+ 
++#define BNXT_COREDUMP_BUF_LEN(len) ((len) - sizeof(struct bnxt_coredump_record))
++
+ struct bnxt_hwrm_dbg_dma_info {
+       void *dest_buf;
+       int dest_buf_size;
+@@ -38,6 +40,8 @@ struct bnxt_hwrm_dbg_dma_info {
+       u16 seq_off;
+       u16 data_len_off;
+       u16 segs;
++      u32 seg_start;
++      u32 buf_len;
+ };
+ 
+ struct hwrm_dbg_cmn_input {
diff --git a/queue-5.4/bonding-fix-active-backup-transition-after-link-failure.patch b/queue-5.4/bonding-fix-active-backup-transition-after-link-failure.patch

new file mode 100644 (file)

index 0000000..f2be2d7
--- /dev/null
+++ b/queue-5.4/bonding-fix-active-backup-transition-after-link-failure.patch
@@ -0,0 +1,47 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Mahesh Bandewar <maheshb@google.com>
+Date: Fri, 6 Dec 2019 15:44:55 -0800
+Subject: bonding: fix active-backup transition after link failure
+
+From: Mahesh Bandewar <maheshb@google.com>
+
+[ Upstream commit 5d485ed88d48f8101a2067348e267c0aaf4ed486 ]
+
+After the recent fix in commit 1899bb325149 ("bonding: fix state
+transition issue in link monitoring"), the active-backup mode with
+miimon initially come-up fine but after a link-failure, both members
+transition into backup state.
+
+Following steps to reproduce the scenario (eth1 and eth2 are the
+slaves of the bond):
+
+    ip link set eth1 up
+    ip link set eth2 down
+    sleep 1
+    ip link set eth2 up
+    ip link set eth1 down
+    cat /sys/class/net/eth1/bonding_slave/state
+    cat /sys/class/net/eth2/bonding_slave/state
+
+Fixes: 1899bb325149 ("bonding: fix state transition issue in link monitoring")
+CC: Jay Vosburgh <jay.vosburgh@canonical.com>
+Signed-off-by: Mahesh Bandewar <maheshb@google.com>
+Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |    3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -2225,9 +2225,6 @@ static void bond_miimon_commit(struct bo
+                       } else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
+                               /* make it immediately active */
+                               bond_set_active_slave(slave);
+-                      } else if (slave != primary) {
+-                              /* prevent it from being the active one */
+-                              bond_set_backup_slave(slave);
+                       }
+ 
+                       slave_info(bond->dev, slave->dev, "link status definitely up, %u Mbps %s duplex\n",
diff --git a/queue-5.4/cxgb4-cxgb4vf-fix-flow-control-display-for-auto-negotiation.patch b/queue-5.4/cxgb4-cxgb4vf-fix-flow-control-display-for-auto-negotiation.patch

new file mode 100644 (file)

index 0000000..b85cc29
--- /dev/null
+++ b/queue-5.4/cxgb4-cxgb4vf-fix-flow-control-display-for-auto-negotiation.patch
@@ -0,0 +1,194 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Date: Mon, 30 Dec 2019 18:14:08 +0530
+Subject: cxgb4/cxgb4vf: fix flow control display for auto negotiation
+
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+
+[ Upstream commit 0caeaf6ad532f9be5a768a158627cb31921cc8b7 ]
+
+As per 802.3-2005, Section Two, Annex 28B, Table 28B-2 [1], when
+_only_ Rx pause is enabled, both symmetric and asymmetric pause
+towards local device must be enabled. Also, firmware returns the local
+device's flow control pause params as part of advertised capabilities
+and negotiated params as part of current link attributes. So, fix up
+ethtool's flow control pause params fetch logic to read from acaps,
+instead of linkattr.
+
+[1] https://standards.ieee.org/standard/802_3-2005.html
+
+Fixes: c3168cabe1af ("cxgb4/cxgbvf: Handle 32-bit fw port capabilities")
+Signed-off-by: Surendra Mobiya <surendra@chelsio.com>
+Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4.h          |    1 
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c  |    4 +--
+ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c          |   21 ++++++++++++--------
+ drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c |    4 +--
+ drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h  |    1 
+ drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c      |   18 ++++++++++-------
+ 6 files changed, 30 insertions(+), 19 deletions(-)
+
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+@@ -503,6 +503,7 @@ struct link_config {
+ 
+       enum cc_pause  requested_fc;     /* flow control user has requested */
+       enum cc_pause  fc;               /* actual link flow control */
++      enum cc_pause  advertised_fc;    /* actual advertised flow control */
+ 
+       enum cc_fec    requested_fec;    /* Forward Error Correction: */
+       enum cc_fec    fec;              /* requested and actual in use */
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+@@ -793,8 +793,8 @@ static void get_pauseparam(struct net_de
+       struct port_info *p = netdev_priv(dev);
+ 
+       epause->autoneg = (p->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
+-      epause->rx_pause = (p->link_cfg.fc & PAUSE_RX) != 0;
+-      epause->tx_pause = (p->link_cfg.fc & PAUSE_TX) != 0;
++      epause->rx_pause = (p->link_cfg.advertised_fc & PAUSE_RX) != 0;
++      epause->tx_pause = (p->link_cfg.advertised_fc & PAUSE_TX) != 0;
+ }
+ 
+ static int set_pauseparam(struct net_device *dev,
+--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+@@ -4089,7 +4089,8 @@ static inline fw_port_cap32_t cc_to_fwca
+               if (cc_pause & PAUSE_TX)
+                       fw_pause |= FW_PORT_CAP32_802_3_PAUSE;
+               else
+-                      fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR;
++                      fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR |
++                                  FW_PORT_CAP32_802_3_PAUSE;
+       } else if (cc_pause & PAUSE_TX) {
+               fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR;
+       }
+@@ -8563,17 +8564,17 @@ static fw_port_cap32_t lstatus_to_fwcap(
+ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
+ {
+       const struct fw_port_cmd *cmd = (const void *)rpl;
+-      int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
+-      struct adapter *adapter = pi->adapter;
++      fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
+       struct link_config *lc = &pi->link_cfg;
+-      int link_ok, linkdnrc;
+-      enum fw_port_type port_type;
++      struct adapter *adapter = pi->adapter;
++      unsigned int speed, fc, fec, adv_fc;
+       enum fw_port_module_type mod_type;
+-      unsigned int speed, fc, fec;
+-      fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
++      int action, link_ok, linkdnrc;
++      enum fw_port_type port_type;
+ 
+       /* Extract the various fields from the Port Information message.
+        */
++      action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
+       switch (action) {
+       case FW_PORT_ACTION_GET_PORT_INFO: {
+               u32 lstatus = be32_to_cpu(cmd->u.info.lstatus_to_modtype);
+@@ -8611,6 +8612,7 @@ void t4_handle_get_port_info(struct port
+       }
+ 
+       fec = fwcap_to_cc_fec(acaps);
++      adv_fc = fwcap_to_cc_pause(acaps);
+       fc = fwcap_to_cc_pause(linkattr);
+       speed = fwcap_to_speed(linkattr);
+ 
+@@ -8667,7 +8669,9 @@ void t4_handle_get_port_info(struct port
+       }
+ 
+       if (link_ok != lc->link_ok || speed != lc->speed ||
+-          fc != lc->fc || fec != lc->fec) {   /* something changed */
++          fc != lc->fc || adv_fc != lc->advertised_fc ||
++          fec != lc->fec) {
++              /* something changed */
+               if (!link_ok && lc->link_ok) {
+                       lc->link_down_rc = linkdnrc;
+                       dev_warn_ratelimited(adapter->pdev_dev,
+@@ -8677,6 +8681,7 @@ void t4_handle_get_port_info(struct port
+               }
+               lc->link_ok = link_ok;
+               lc->speed = speed;
++              lc->advertised_fc = adv_fc;
+               lc->fc = fc;
+               lc->fec = fec;
+ 
+--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
++++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+@@ -1690,8 +1690,8 @@ static void cxgb4vf_get_pauseparam(struc
+       struct port_info *pi = netdev_priv(dev);
+ 
+       pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
+-      pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
+-      pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
++      pauseparam->rx_pause = (pi->link_cfg.advertised_fc & PAUSE_RX) != 0;
++      pauseparam->tx_pause = (pi->link_cfg.advertised_fc & PAUSE_TX) != 0;
+ }
+ 
+ /*
+--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
++++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+@@ -135,6 +135,7 @@ struct link_config {
+ 
+       enum cc_pause   requested_fc;   /* flow control user has requested */
+       enum cc_pause   fc;             /* actual link flow control */
++      enum cc_pause   advertised_fc;  /* actual advertised flow control */
+ 
+       enum cc_fec     auto_fec;       /* Forward Error Correction: */
+       enum cc_fec     requested_fec;  /*   "automatic" (IEEE 802.3), */
+--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
++++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+@@ -1913,16 +1913,16 @@ static const char *t4vf_link_down_rc_str
+ static void t4vf_handle_get_port_info(struct port_info *pi,
+                                     const struct fw_port_cmd *cmd)
+ {
+-      int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
+-      struct adapter *adapter = pi->adapter;
++      fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
+       struct link_config *lc = &pi->link_cfg;
+-      int link_ok, linkdnrc;
+-      enum fw_port_type port_type;
++      struct adapter *adapter = pi->adapter;
++      unsigned int speed, fc, fec, adv_fc;
+       enum fw_port_module_type mod_type;
+-      unsigned int speed, fc, fec;
+-      fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
++      int action, link_ok, linkdnrc;
++      enum fw_port_type port_type;
+ 
+       /* Extract the various fields from the Port Information message. */
++      action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
+       switch (action) {
+       case FW_PORT_ACTION_GET_PORT_INFO: {
+               u32 lstatus = be32_to_cpu(cmd->u.info.lstatus_to_modtype);
+@@ -1982,6 +1982,7 @@ static void t4vf_handle_get_port_info(st
+       }
+ 
+       fec = fwcap_to_cc_fec(acaps);
++      adv_fc = fwcap_to_cc_pause(acaps);
+       fc = fwcap_to_cc_pause(linkattr);
+       speed = fwcap_to_speed(linkattr);
+ 
+@@ -2012,7 +2013,9 @@ static void t4vf_handle_get_port_info(st
+       }
+ 
+       if (link_ok != lc->link_ok || speed != lc->speed ||
+-          fc != lc->fc || fec != lc->fec) {   /* something changed */
++          fc != lc->fc || adv_fc != lc->advertised_fc ||
++          fec != lc->fec) {
++              /* something changed */
+               if (!link_ok && lc->link_ok) {
+                       lc->link_down_rc = linkdnrc;
+                       dev_warn_ratelimited(adapter->pdev_dev,
+@@ -2022,6 +2025,7 @@ static void t4vf_handle_get_port_info(st
+               }
+               lc->link_ok = link_ok;
+               lc->speed = speed;
++              lc->advertised_fc = adv_fc;
+               lc->fc = fc;
+               lc->fec = fec;
+ 
diff --git a/queue-5.4/gtp-avoid-zero-size-hashtable.patch b/queue-5.4/gtp-avoid-zero-size-hashtable.patch

new file mode 100644 (file)

index 0000000..f07a154
--- /dev/null
+++ b/queue-5.4/gtp-avoid-zero-size-hashtable.patch
@@ -0,0 +1,39 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:48 +0000
+Subject: gtp: avoid zero size hashtable
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 6a902c0f31993ab02e1b6ea7085002b9c9083b6a ]
+
+GTP default hashtable size is 1024 and userspace could set specific
+hashtable size with IFLA_GTP_PDP_HASHSIZE. If hashtable size is set to 0
+from userspace,  hashtable will not work and panic will occur.
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -660,10 +660,13 @@ static int gtp_newlink(struct net *src_n
+       if (err < 0)
+               return err;
+ 
+-      if (!data[IFLA_GTP_PDP_HASHSIZE])
++      if (!data[IFLA_GTP_PDP_HASHSIZE]) {
+               hashsize = 1024;
+-      else
++      } else {
+               hashsize = nla_get_u32(data[IFLA_GTP_PDP_HASHSIZE]);
++              if (!hashsize)
++                      hashsize = 1024;
++      }
+ 
+       err = gtp_hashtable_new(gtp, hashsize);
+       if (err < 0)
diff --git a/queue-5.4/gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch b/queue-5.4/gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch

new file mode 100644 (file)

index 0000000..ea3179b
--- /dev/null
+++ b/queue-5.4/gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch
@@ -0,0 +1,88 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:00 +0000
+Subject: gtp: do not allow adding duplicate tid and ms_addr pdp context
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 6b01b1d9b2d38dc84ac398bfe9f00baff06a31e5 ]
+
+GTP RX packet path lookups pdp context with TID. If duplicate TID pdp
+contexts are existing in the list, it couldn't select correct pdp context.
+So, TID value  should be unique.
+GTP TX packet path lookups pdp context with ms_addr. If duplicate ms_addr pdp
+contexts are existing in the list, it couldn't select correct pdp context.
+So, ms_addr value should be unique.
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c |   32 ++++++++++++++++++++++----------
+ 1 file changed, 22 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -928,24 +928,31 @@ static void ipv4_pdp_fill(struct pdp_ctx
+       }
+ }
+ 
+-static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk,
+-                      struct genl_info *info)
++static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
++                     struct genl_info *info)
+ {
++      struct pdp_ctx *pctx, *pctx_tid = NULL;
+       struct net_device *dev = gtp->dev;
+       u32 hash_ms, hash_tid = 0;
+-      struct pdp_ctx *pctx;
++      unsigned int version;
+       bool found = false;
+       __be32 ms_addr;
+ 
+       ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]);
+       hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size;
++      version = nla_get_u32(info->attrs[GTPA_VERSION]);
+ 
+-      hlist_for_each_entry_rcu(pctx, &gtp->addr_hash[hash_ms], hlist_addr) {
+-              if (pctx->ms_addr_ip4.s_addr == ms_addr) {
+-                      found = true;
+-                      break;
+-              }
+-      }
++      pctx = ipv4_pdp_find(gtp, ms_addr);
++      if (pctx)
++              found = true;
++      if (version == GTP_V0)
++              pctx_tid = gtp0_pdp_find(gtp,
++                                       nla_get_u64(info->attrs[GTPA_TID]));
++      else if (version == GTP_V1)
++              pctx_tid = gtp1_pdp_find(gtp,
++                                       nla_get_u32(info->attrs[GTPA_I_TEI]));
++      if (pctx_tid)
++              found = true;
+ 
+       if (found) {
+               if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
+@@ -953,6 +960,11 @@ static int ipv4_pdp_add(struct gtp_dev *
+               if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE)
+                       return -EOPNOTSUPP;
+ 
++              if (pctx && pctx_tid)
++                      return -EEXIST;
++              if (!pctx)
++                      pctx = pctx_tid;
++
+               ipv4_pdp_fill(pctx, info);
+ 
+               if (pctx->gtp_version == GTP_V0)
+@@ -1076,7 +1088,7 @@ static int gtp_genl_new_pdp(struct sk_bu
+               goto out_unlock;
+       }
+ 
+-      err = ipv4_pdp_add(gtp, sk, info);
++      err = gtp_pdp_add(gtp, sk, info);
+ 
+ out_unlock:
+       rcu_read_unlock();
diff --git a/queue-5.4/gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-5.4/gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch

new file mode 100644 (file)

index 0000000..c00e37c
--- /dev/null
+++ b/queue-5.4/gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch
@@ -0,0 +1,43 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:11 +0800
+Subject: gtp: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 6e9105c73f8d2163d12d5dfd762fd75483ed30f5 ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+Although GTP only support ipv4 right now, and __ip_rt_update_pmtu() does not
+call dst_confirm_neigh(), we still set it to false to keep consistency with
+IPv6 code.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -541,7 +541,7 @@ static int gtp_build_skb_ip4(struct sk_b
+               mtu = dst_mtu(&rt->dst);
+       }
+ 
+-      rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true);
++      rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false);
+ 
+       if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
+           mtu < ntohs(iph->tot_len)) {
diff --git a/queue-5.4/gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch b/queue-5.4/gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch

new file mode 100644 (file)

index 0000000..3000ac1
--- /dev/null
+++ b/queue-5.4/gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch
@@ -0,0 +1,160 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:34 +0000
+Subject: gtp: fix an use-after-free in ipv4_pdp_find()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 94dc550a5062030569d4aa76e10e50c8fc001930 ]
+
+ipv4_pdp_find() is called in TX packet path of GTP.
+ipv4_pdp_find() internally uses gtp->tid_hash to lookup pdp context.
+In the current code, gtp->tid_hash and gtp->addr_hash are freed by
+->dellink(), which is gtp_dellink().
+But gtp_dellink() would be called while packets are processing.
+So, gtp_dellink() should not free gtp->tid_hash and gtp->addr_hash.
+Instead, dev->priv_destructor() would be used because this callback
+is called after all packet processing safely.
+
+Test commands:
+    ip link add veth1 type veth peer name veth2
+    ip a a 172.0.0.1/24 dev veth1
+    ip link set veth1 up
+    ip a a 172.99.0.1/32 dev lo
+
+    gtp-link add gtp1 &
+
+    gtp-tunnel add gtp1 v1 200 100 172.99.0.2 172.0.0.2
+    ip r a  172.99.0.2/32 dev gtp1
+    ip link set gtp1 mtu 1500
+
+    ip netns add ns2
+    ip link set veth2 netns ns2
+    ip netns exec ns2 ip a a 172.0.0.2/24 dev veth2
+    ip netns exec ns2 ip link set veth2 up
+    ip netns exec ns2 ip a a 172.99.0.2/32 dev lo
+    ip netns exec ns2 ip link set lo up
+
+    ip netns exec ns2 gtp-link add gtp2 &
+    ip netns exec ns2 gtp-tunnel add gtp2 v1 100 200 172.99.0.1 172.0.0.1
+    ip netns exec ns2 ip r a 172.99.0.1/32 dev gtp2
+    ip netns exec ns2 ip link set gtp2 mtu 1500
+
+    hping3 172.99.0.2 -2 --flood &
+    ip link del gtp1
+
+Splat looks like:
+[   72.568081][ T1195] BUG: KASAN: use-after-free in ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[   72.568916][ T1195] Read of size 8 at addr ffff8880b9a35d28 by task hping3/1195
+[   72.569631][ T1195]
+[   72.569861][ T1195] CPU: 2 PID: 1195 Comm: hping3 Not tainted 5.5.0-rc1 #199
+[   72.570547][ T1195] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[   72.571438][ T1195] Call Trace:
+[   72.571764][ T1195]  dump_stack+0x96/0xdb
+[   72.572171][ T1195]  ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[   72.572761][ T1195]  print_address_description.constprop.5+0x1be/0x360
+[   72.573400][ T1195]  ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[   72.573971][ T1195]  ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[   72.574544][ T1195]  __kasan_report+0x12a/0x16f
+[   72.575014][ T1195]  ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[   72.575593][ T1195]  kasan_report+0xe/0x20
+[   72.576004][ T1195]  ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[   72.576577][ T1195]  gtp_build_skb_ip4+0x199/0x1420 [gtp]
+[ ... ]
+[   72.647671][ T1195] BUG: unable to handle page fault for address: ffff8880b9a35d28
+[   72.648512][ T1195] #PF: supervisor read access in kernel mode
+[   72.649158][ T1195] #PF: error_code(0x0000) - not-present page
+[   72.649849][ T1195] PGD a6c01067 P4D a6c01067 PUD 11fb07067 PMD 11f939067 PTE 800fffff465ca060
+[   72.652958][ T1195] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
+[   72.653834][ T1195] CPU: 2 PID: 1195 Comm: hping3 Tainted: G    B             5.5.0-rc1 #199
+[   72.668062][ T1195] RIP: 0010:ipv4_pdp_find.isra.12+0x86/0x170 [gtp]
+[ ... ]
+[   72.679168][ T1195] Call Trace:
+[   72.679603][ T1195]  gtp_build_skb_ip4+0x199/0x1420 [gtp]
+[   72.681915][ T1195]  ? ipv4_pdp_find.isra.12+0x170/0x170 [gtp]
+[   72.682513][ T1195]  ? lock_acquire+0x164/0x3b0
+[   72.682966][ T1195]  ? gtp_dev_xmit+0x35e/0x890 [gtp]
+[   72.683481][ T1195]  gtp_dev_xmit+0x3c2/0x890 [gtp]
+[ ... ]
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c |   34 +++++++++++++++++-----------------
+ 1 file changed, 17 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -640,9 +640,16 @@ static void gtp_link_setup(struct net_de
+ }
+ 
+ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
+-static void gtp_hashtable_free(struct gtp_dev *gtp);
+ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]);
+ 
++static void gtp_destructor(struct net_device *dev)
++{
++      struct gtp_dev *gtp = netdev_priv(dev);
++
++      kfree(gtp->addr_hash);
++      kfree(gtp->tid_hash);
++}
++
+ static int gtp_newlink(struct net *src_net, struct net_device *dev,
+                      struct nlattr *tb[], struct nlattr *data[],
+                      struct netlink_ext_ack *extack)
+@@ -680,13 +687,15 @@ static int gtp_newlink(struct net *src_n
+ 
+       gn = net_generic(dev_net(dev), gtp_net_id);
+       list_add_rcu(&gtp->list, &gn->gtp_dev_list);
++      dev->priv_destructor = gtp_destructor;
+ 
+       netdev_dbg(dev, "registered new GTP interface\n");
+ 
+       return 0;
+ 
+ out_hashtable:
+-      gtp_hashtable_free(gtp);
++      kfree(gtp->addr_hash);
++      kfree(gtp->tid_hash);
+ out_encap:
+       gtp_encap_disable(gtp);
+       return err;
+@@ -695,8 +704,13 @@ out_encap:
+ static void gtp_dellink(struct net_device *dev, struct list_head *head)
+ {
+       struct gtp_dev *gtp = netdev_priv(dev);
++      struct pdp_ctx *pctx;
++      int i;
++
++      for (i = 0; i < gtp->hash_size; i++)
++              hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid)
++                      pdp_context_delete(pctx);
+ 
+-      gtp_hashtable_free(gtp);
+       list_del_rcu(&gtp->list);
+       unregister_netdevice_queue(dev, head);
+ }
+@@ -774,20 +788,6 @@ err1:
+       return -ENOMEM;
+ }
+ 
+-static void gtp_hashtable_free(struct gtp_dev *gtp)
+-{
+-      struct pdp_ctx *pctx;
+-      int i;
+-
+-      for (i = 0; i < gtp->hash_size; i++)
+-              hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid)
+-                      pdp_context_delete(pctx);
+-
+-      synchronize_rcu();
+-      kfree(gtp->addr_hash);
+-      kfree(gtp->tid_hash);
+-}
+-
+ static struct sock *gtp_encap_enable_socket(int fd, int type,
+                                           struct gtp_dev *gtp)
+ {
diff --git a/queue-5.4/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch b/queue-5.4/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch

new file mode 100644 (file)

index 0000000..65a57dd
--- /dev/null
+++ b/queue-5.4/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch
@@ -0,0 +1,102 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:17 +0000
+Subject: gtp: fix wrong condition in gtp_genl_dump_pdp()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 94a6d9fb88df43f92d943c32b84ce398d50bf49f ]
+
+gtp_genl_dump_pdp() is ->dumpit() callback of GTP module and it is used
+to dump pdp contexts. it would be re-executed because of dump packet size.
+
+If dump packet size is too big, it saves current dump pointer
+(gtp interface pointer, bucket, TID value) then it restarts dump from
+last pointer.
+Current GTP code allows adding zero TID pdp context but dump code
+ignores zero TID value. So, last dump pointer will not be found.
+
+In addition, this patch adds missing rcu_read_lock() in
+gtp_genl_dump_pdp().
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c |   36 +++++++++++++++++++-----------------
+ 1 file changed, 19 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -38,7 +38,6 @@ struct pdp_ctx {
+       struct hlist_node       hlist_addr;
+ 
+       union {
+-              u64             tid;
+               struct {
+                       u64     tid;
+                       u16     flow;
+@@ -1232,43 +1231,46 @@ static int gtp_genl_dump_pdp(struct sk_b
+                               struct netlink_callback *cb)
+ {
+       struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp;
++      int i, j, bucket = cb->args[0], skip = cb->args[1];
+       struct net *net = sock_net(skb->sk);
+-      struct gtp_net *gn = net_generic(net, gtp_net_id);
+-      unsigned long tid = cb->args[1];
+-      int i, k = cb->args[0], ret;
+       struct pdp_ctx *pctx;
++      struct gtp_net *gn;
++
++      gn = net_generic(net, gtp_net_id);
+ 
+       if (cb->args[4])
+               return 0;
+ 
++      rcu_read_lock();
+       list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) {
+               if (last_gtp && last_gtp != gtp)
+                       continue;
+               else
+                       last_gtp = NULL;
+ 
+-              for (i = k; i < gtp->hash_size; i++) {
+-                      hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid) {
+-                              if (tid && tid != pctx->u.tid)
+-                                      continue;
+-                              else
+-                                      tid = 0;
+-
+-                              ret = gtp_genl_fill_info(skb,
+-                                                       NETLINK_CB(cb->skb).portid,
+-                                                       cb->nlh->nlmsg_seq,
+-                                                       cb->nlh->nlmsg_type, pctx);
+-                              if (ret < 0) {
++              for (i = bucket; i < gtp->hash_size; i++) {
++                      j = 0;
++                      hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i],
++                                               hlist_tid) {
++                              if (j >= skip &&
++                                  gtp_genl_fill_info(skb,
++                                          NETLINK_CB(cb->skb).portid,
++                                          cb->nlh->nlmsg_seq,
++                                          cb->nlh->nlmsg_type, pctx)) {
+                                       cb->args[0] = i;
+-                                      cb->args[1] = pctx->u.tid;
++                                      cb->args[1] = j;
+                                       cb->args[2] = (unsigned long)gtp;
+                                       goto out;
+                               }
++                              j++;
+                       }
++                      skip = 0;
+               }
++              bucket = 0;
+       }
+       cb->args[4] = 1;
+ out:
++      rcu_read_unlock();
+       return skb->len;
+ }
+ 
diff --git a/queue-5.4/hv_netvsc-fix-tx_table-init-in-rndis_set_subchannel.patch b/queue-5.4/hv_netvsc-fix-tx_table-init-in-rndis_set_subchannel.patch

new file mode 100644 (file)

index 0000000..5e94e5f
--- /dev/null
+++ b/queue-5.4/hv_netvsc-fix-tx_table-init-in-rndis_set_subchannel.patch
@@ -0,0 +1,47 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Wed, 11 Dec 2019 14:26:27 -0800
+Subject: hv_netvsc: Fix tx_table init in rndis_set_subchannel()
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Upstream commit c39ea5cba5a2e97fc01b78c85208bf31383b399c ]
+
+Host can provide send indirection table messages anytime after RSS is
+enabled by calling rndis_filter_set_rss_param(). So the host provided
+table values may be overwritten by the initialization in
+rndis_set_subchannel().
+
+To prevent this problem, move the tx_table initialization before calling
+rndis_filter_set_rss_param().
+
+Fixes: a6fb6aa3cfa9 ("hv_netvsc: Set tx_table to equal weight after subchannels open")
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/rndis_filter.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1165,6 +1165,9 @@ int rndis_set_subchannel(struct net_devi
+       wait_event(nvdev->subchan_open,
+                  atomic_read(&nvdev->open_chn) == nvdev->num_chn);
+ 
++      for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
++              ndev_ctx->tx_table[i] = i % nvdev->num_chn;
++
+       /* ignore failures from setting rss parameters, still have channels */
+       if (dev_info)
+               rndis_filter_set_rss_param(rdev, dev_info->rss_key);
+@@ -1174,9 +1177,6 @@ int rndis_set_subchannel(struct net_devi
+       netif_set_real_num_tx_queues(ndev, nvdev->num_chn);
+       netif_set_real_num_rx_queues(ndev, nvdev->num_chn);
+ 
+-      for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
+-              ndev_ctx->tx_table[i] = i % nvdev->num_chn;
+-
+       return 0;
+ }
+ 
diff --git a/queue-5.4/ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-5.4/ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch

new file mode 100644 (file)

index 0000000..1c126e6
--- /dev/null
+++ b/queue-5.4/ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch
@@ -0,0 +1,47 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:10 +0800
+Subject: ip6_gre: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 675d76ad0ad5bf41c9a129772ef0aba8f57ea9a7 ]
+
+When we do ipv6 gre pmtu update, we will also do neigh confirm currently.
+This will cause the neigh cache be refreshed and set to REACHABLE before
+xmit.
+
+But if the remote mac address changed, e.g. device is deleted and recreated,
+we will not able to notice this and still use the old mac address as the neigh
+cache is REACHABLE.
+
+Fix this by disable neigh confirm when do pmtu update
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1040,7 +1040,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit
+ 
+       /* TooBig packet may have updated dst->dev's mtu */
+       if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+-              dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true);
++              dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false);
+ 
+       err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+                          NEXTHDR_GRE);
diff --git a/queue-5.4/ipv6-addrconf-only-check-invalid-header-values-when-netlink_f_strict_chk-is-set.patch b/queue-5.4/ipv6-addrconf-only-check-invalid-header-values-when-netlink_f_strict_chk-is-set.patch

new file mode 100644 (file)

index 0000000..d0896e7
--- /dev/null
+++ b/queue-5.4/ipv6-addrconf-only-check-invalid-header-values-when-netlink_f_strict_chk-is-set.patch
@@ -0,0 +1,50 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Wed, 11 Dec 2019 22:20:16 +0800
+Subject: ipv6/addrconf: only check invalid header values when NETLINK_F_STRICT_CHK is set
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 2beb6d2901a3f73106485d560c49981144aeacb1 ]
+
+In commit 4b1373de73a3 ("net: ipv6: addr: perform strict checks also for
+doit handlers") we add strict check for inet6_rtm_getaddr(). But we did
+the invalid header values check before checking if NETLINK_F_STRICT_CHK
+is set. This may break backwards compatibility if user already set the
+ifm->ifa_prefixlen, ifm->ifa_flags, ifm->ifa_scope in their netlink code.
+
+I didn't move the nlmsg_len check because I thought it's a valid check.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Fixes: 4b1373de73a3 ("net: ipv6: addr: perform strict checks also for doit handlers")
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -5231,16 +5231,16 @@ static int inet6_rtm_valid_getaddr_req(s
+               return -EINVAL;
+       }
+ 
++      if (!netlink_strict_get_check(skb))
++              return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
++                                            ifa_ipv6_policy, extack);
++
+       ifm = nlmsg_data(nlh);
+       if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
+               NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request");
+               return -EINVAL;
+       }
+ 
+-      if (!netlink_strict_get_check(skb))
+-              return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+-                                            ifa_ipv6_policy, extack);
+-
+       err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
+                                           ifa_ipv6_policy, extack);
+       if (err)
diff --git a/queue-5.4/mlxsw-spectrum-use-dedicated-policer-for-vrrp-packets.patch b/queue-5.4/mlxsw-spectrum-use-dedicated-policer-for-vrrp-packets.patch

new file mode 100644 (file)

index 0000000..5f90317
--- /dev/null
+++ b/queue-5.4/mlxsw-spectrum-use-dedicated-policer-for-vrrp-packets.patch
@@ -0,0 +1,70 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Sun, 29 Dec 2019 13:40:23 +0200
+Subject: mlxsw: spectrum: Use dedicated policer for VRRP packets
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit acca789a358cc960be3937851d7de6591c79d6c2 ]
+
+Currently, VRRP packets and packets that hit exceptions during routing
+(e.g., MTU error) are policed using the same policer towards the CPU.
+This means, for example, that misconfiguration of the MTU on a routed
+interface can prevent VRRP packets from reaching the CPU, which in turn
+can cause the VRRP daemon to assume it is the Master router.
+
+Fix this by using a dedicated policer for VRRP packets.
+
+Fixes: 11566d34f895 ("mlxsw: spectrum: Add VRRP traps")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reported-by: Alex Veber <alexve@mellanox.com>
+Tested-by: Alex Veber <alexve@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/reg.h      |    1 +
+ drivers/net/ethernet/mellanox/mlxsw/spectrum.c |    9 +++++++--
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
++++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
+@@ -5421,6 +5421,7 @@ enum mlxsw_reg_htgt_trap_group {
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
++      MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP,
+ 
+       __MLXSW_REG_HTGT_TRAP_GROUP_MAX,
+       MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+@@ -4398,8 +4398,8 @@ static const struct mlxsw_listener mlxsw
+       MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_MARK(DECAP_ECN0, TRAP_TO_CPU, ROUTER_EXP, false),
+-      MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
+-      MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
++      MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, VRRP, false),
++      MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, VRRP, false),
+       /* PKT Sample trap */
+       MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
+                 false, SP_IP2ME, DISCARD),
+@@ -4483,6 +4483,10 @@ static int mlxsw_sp_cpu_policers_set(str
+                       rate = 19 * 1024;
+                       burst_size = 12;
+                       break;
++              case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP:
++                      rate = 360;
++                      burst_size = 7;
++                      break;
+               default:
+                       continue;
+               }
+@@ -4522,6 +4526,7 @@ static int mlxsw_sp_trap_groups_set(stru
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
++              case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP:
+                       priority = 5;
+                       tc = 5;
+                       break;
diff --git a/queue-5.4/mlxsw-spectrum_router-skip-loopback-rifs-during-mac-validation.patch b/queue-5.4/mlxsw-spectrum_router-skip-loopback-rifs-during-mac-validation.patch

new file mode 100644 (file)

index 0000000..44fb141
--- /dev/null
+++ b/queue-5.4/mlxsw-spectrum_router-skip-loopback-rifs-during-mac-validation.patch
@@ -0,0 +1,50 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Amit Cohen <amitc@mellanox.com>
+Date: Sun, 29 Dec 2019 13:40:22 +0200
+Subject: mlxsw: spectrum_router: Skip loopback RIFs during MAC validation
+
+From: Amit Cohen <amitc@mellanox.com>
+
+[ Upstream commit 314bd842d98e1035cc40b671a71e07f48420e58f ]
+
+When a router interface (RIF) is created the MAC address of the backing
+netdev is verified to have the same MSBs as existing RIFs. This is
+required in order to avoid changing existing RIF MAC addresses that all
+share the same MSBs.
+
+Loopback RIFs are special in this regard as they do not have a MAC
+address, given they are only used to loop packets from the overlay to
+the underlay.
+
+Without this change, an error is returned when trying to create a RIF
+after the creation of a GRE tunnel that is represented by a loopback
+RIF. 'rif->dev->dev_addr' points to the GRE device's local IP, which
+does not share the same MSBs as physical interfaces. Adding an IP
+address to any physical interface results in:
+
+Error: mlxsw_spectrum: All router interface MAC addresses must have the
+same prefix.
+
+Fix this by skipping loopback RIFs during MAC validation.
+
+Fixes: 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC addresses")
+Signed-off-by: Amit Cohen <amitc@mellanox.com>
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+@@ -6985,6 +6985,9 @@ static int mlxsw_sp_router_port_check_ri
+ 
+       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
+               rif = mlxsw_sp->router->rifs[i];
++              if (rif && rif->ops &&
++                  rif->ops->type == MLXSW_SP_RIF_TYPE_IPIP_LB)
++                      continue;
+               if (rif && rif->dev && rif->dev != dev &&
+                   !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
+                                            mlxsw_sp->mac_mask)) {
diff --git a/queue-5.4/net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch b/queue-5.4/net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch

new file mode 100644 (file)

index 0000000..745da1b
--- /dev/null
+++ b/queue-5.4/net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch
@@ -0,0 +1,325 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:09 +0800
+Subject: net: add bool confirm_neigh parameter for dst_ops.update_pmtu
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit bd085ef678b2cc8c38c105673dfe8ff8f5ec0c57 ]
+
+The MTU update code is supposed to be invoked in response to real
+networking events that update the PMTU. In IPv6 PMTU update function
+__ip6_rt_update_pmtu() we called dst_confirm_neigh() to update neighbor
+confirmed time.
+
+But for tunnel code, it will call pmtu before xmit, like:
+  - tnl_update_pmtu()
+    - skb_dst_update_pmtu()
+      - ip6_rt_update_pmtu()
+        - __ip6_rt_update_pmtu()
+          - dst_confirm_neigh()
+
+If the tunnel remote dst mac address changed and we still do the neigh
+confirm, we will not be able to update neigh cache and ping6 remote
+will failed.
+
+So for this ip_tunnel_xmit() case, _EVEN_ if the MTU is changed, we
+should not be invoking dst_confirm_neigh() as we have no evidence
+of successful two-way communication at this point.
+
+On the other hand it is also important to keep the neigh reachability fresh
+for TCP flows, so we cannot remove this dst_confirm_neigh() call.
+
+To fix the issue, we have to add a new bool parameter for dst_ops.update_pmtu
+to choose whether we should do neigh update or not. I will add the parameter
+in this patch and set all the callers to true to comply with the previous
+way, and fix the tunnel code one by one on later patches.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Suggested-by: David Miller <davem@davemloft.net>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c                |    2 +-
+ include/net/dst.h                |    2 +-
+ include/net/dst_ops.h            |    3 ++-
+ net/bridge/br_nf_core.c          |    3 ++-
+ net/decnet/dn_route.c            |    6 ++++--
+ net/ipv4/inet_connection_sock.c  |    2 +-
+ net/ipv4/route.c                 |    9 ++++++---
+ net/ipv4/xfrm4_policy.c          |    5 +++--
+ net/ipv6/inet6_connection_sock.c |    2 +-
+ net/ipv6/ip6_gre.c               |    2 +-
+ net/ipv6/route.c                 |   22 +++++++++++++++-------
+ net/ipv6/xfrm6_policy.c          |    5 +++--
+ net/netfilter/ipvs/ip_vs_xmit.c  |    2 +-
+ net/sctp/transport.c             |    2 +-
+ 14 files changed, 42 insertions(+), 25 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -541,7 +541,7 @@ static int gtp_build_skb_ip4(struct sk_b
+               mtu = dst_mtu(&rt->dst);
+       }
+ 
+-      rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu);
++      rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true);
+ 
+       if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
+           mtu < ntohs(iph->tot_len)) {
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -516,7 +516,7 @@ static inline void skb_dst_update_pmtu(s
+       struct dst_entry *dst = skb_dst(skb);
+ 
+       if (dst && dst->ops->update_pmtu)
+-              dst->ops->update_pmtu(dst, NULL, skb, mtu);
++              dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
+ }
+ 
+ static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+--- a/include/net/dst_ops.h
++++ b/include/net/dst_ops.h
+@@ -27,7 +27,8 @@ struct dst_ops {
+       struct dst_entry *      (*negative_advice)(struct dst_entry *);
+       void                    (*link_failure)(struct sk_buff *);
+       void                    (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
+-                                             struct sk_buff *skb, u32 mtu);
++                                             struct sk_buff *skb, u32 mtu,
++                                             bool confirm_neigh);
+       void                    (*redirect)(struct dst_entry *dst, struct sock *sk,
+                                           struct sk_buff *skb);
+       int                     (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb);
+--- a/net/bridge/br_nf_core.c
++++ b/net/bridge/br_nf_core.c
+@@ -22,7 +22,8 @@
+ #endif
+ 
+ static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                           struct sk_buff *skb, u32 mtu)
++                           struct sk_buff *skb, u32 mtu,
++                           bool confirm_neigh)
+ {
+ }
+ 
+--- a/net/decnet/dn_route.c
++++ b/net/decnet/dn_route.c
+@@ -110,7 +110,8 @@ static void dn_dst_ifdown(struct dst_ent
+ static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
+ static void dn_dst_link_failure(struct sk_buff *);
+ static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                             struct sk_buff *skb , u32 mtu);
++                             struct sk_buff *skb , u32 mtu,
++                             bool confirm_neigh);
+ static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
+                           struct sk_buff *skb);
+ static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
+@@ -251,7 +252,8 @@ static int dn_dst_gc(struct dst_ops *ops
+  * advertise to the other end).
+  */
+ static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                             struct sk_buff *skb, u32 mtu)
++                             struct sk_buff *skb, u32 mtu,
++                             bool confirm_neigh)
+ {
+       struct dn_route *rt = (struct dn_route *) dst;
+       struct neighbour *n = rt->n;
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -1086,7 +1086,7 @@ struct dst_entry *inet_csk_update_pmtu(s
+               if (!dst)
+                       goto out;
+       }
+-      dst->ops->update_pmtu(dst, sk, NULL, mtu);
++      dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
+ 
+       dst = __sk_dst_check(sk, 0);
+       if (!dst)
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -139,7 +139,8 @@ static unsigned int         ipv4_mtu(const stru
+ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
+ static void            ipv4_link_failure(struct sk_buff *skb);
+ static void            ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                                         struct sk_buff *skb, u32 mtu);
++                                         struct sk_buff *skb, u32 mtu,
++                                         bool confirm_neigh);
+ static void            ip_do_redirect(struct dst_entry *dst, struct sock *sk,
+                                       struct sk_buff *skb);
+ static void           ipv4_dst_destroy(struct dst_entry *dst);
+@@ -1043,7 +1044,8 @@ static void __ip_rt_update_pmtu(struct r
+ }
+ 
+ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                            struct sk_buff *skb, u32 mtu)
++                            struct sk_buff *skb, u32 mtu,
++                            bool confirm_neigh)
+ {
+       struct rtable *rt = (struct rtable *) dst;
+       struct flowi4 fl4;
+@@ -2648,7 +2650,8 @@ static unsigned int ipv4_blackhole_mtu(c
+ }
+ 
+ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                                        struct sk_buff *skb, u32 mtu)
++                                        struct sk_buff *skb, u32 mtu,
++                                        bool confirm_neigh)
+ {
+ }
+ 
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -100,12 +100,13 @@ static int xfrm4_fill_dst(struct xfrm_ds
+ }
+ 
+ static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                            struct sk_buff *skb, u32 mtu)
++                            struct sk_buff *skb, u32 mtu,
++                            bool confirm_neigh)
+ {
+       struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+       struct dst_entry *path = xdst->route;
+ 
+-      path->ops->update_pmtu(path, sk, skb, mtu);
++      path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
+ }
+ 
+ static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk,
+--- a/net/ipv6/inet6_connection_sock.c
++++ b/net/ipv6/inet6_connection_sock.c
+@@ -146,7 +146,7 @@ struct dst_entry *inet6_csk_update_pmtu(
+ 
+       if (IS_ERR(dst))
+               return NULL;
+-      dst->ops->update_pmtu(dst, sk, NULL, mtu);
++      dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
+ 
+       dst = inet6_csk_route_socket(sk, &fl6);
+       return IS_ERR(dst) ? NULL : dst;
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1040,7 +1040,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit
+ 
+       /* TooBig packet may have updated dst->dev's mtu */
+       if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+-              dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
++              dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true);
+ 
+       err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+                          NEXTHDR_GRE);
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -95,7 +95,8 @@ static int           ip6_pkt_prohibit(struct sk_b
+ static int            ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
+ static void           ip6_link_failure(struct sk_buff *skb);
+ static void           ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                                         struct sk_buff *skb, u32 mtu);
++                                         struct sk_buff *skb, u32 mtu,
++                                         bool confirm_neigh);
+ static void           rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
+                                       struct sk_buff *skb);
+ static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
+@@ -264,7 +265,8 @@ static unsigned int ip6_blackhole_mtu(co
+ }
+ 
+ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                                       struct sk_buff *skb, u32 mtu)
++                                       struct sk_buff *skb, u32 mtu,
++                                       bool confirm_neigh)
+ {
+ }
+ 
+@@ -2695,7 +2697,8 @@ static bool rt6_cache_allowed_for_pmtu(c
+ }
+ 
+ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
+-                               const struct ipv6hdr *iph, u32 mtu)
++                               const struct ipv6hdr *iph, u32 mtu,
++                               bool confirm_neigh)
+ {
+       const struct in6_addr *daddr, *saddr;
+       struct rt6_info *rt6 = (struct rt6_info *)dst;
+@@ -2713,7 +2716,10 @@ static void __ip6_rt_update_pmtu(struct
+               daddr = NULL;
+               saddr = NULL;
+       }
+-      dst_confirm_neigh(dst, daddr);
++
++      if (confirm_neigh)
++              dst_confirm_neigh(dst, daddr);
++
+       mtu = max_t(u32, mtu, IPV6_MIN_MTU);
+       if (mtu >= dst_mtu(dst))
+               return;
+@@ -2767,9 +2773,11 @@ out_unlock:
+ }
+ 
+ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                             struct sk_buff *skb, u32 mtu)
++                             struct sk_buff *skb, u32 mtu,
++                             bool confirm_neigh)
+ {
+-      __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
++      __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
++                           confirm_neigh);
+ }
+ 
+ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+@@ -2788,7 +2796,7 @@ void ip6_update_pmtu(struct sk_buff *skb
+ 
+       dst = ip6_route_output(net, NULL, &fl6);
+       if (!dst->error)
+-              __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
++              __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
+       dst_release(dst);
+ }
+ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -98,12 +98,13 @@ static int xfrm6_fill_dst(struct xfrm_ds
+ }
+ 
+ static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                            struct sk_buff *skb, u32 mtu)
++                            struct sk_buff *skb, u32 mtu,
++                            bool confirm_neigh)
+ {
+       struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+       struct dst_entry *path = xdst->route;
+ 
+-      path->ops->update_pmtu(path, sk, skb, mtu);
++      path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
+ }
+ 
+ static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
+--- a/net/netfilter/ipvs/ip_vs_xmit.c
++++ b/net/netfilter/ipvs/ip_vs_xmit.c
+@@ -208,7 +208,7 @@ static inline void maybe_update_pmtu(int
+       struct rtable *ort = skb_rtable(skb);
+ 
+       if (!skb->dev && sk && sk_fullsock(sk))
+-              ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
++              ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true);
+ }
+ 
+ static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
+--- a/net/sctp/transport.c
++++ b/net/sctp/transport.c
+@@ -263,7 +263,7 @@ bool sctp_transport_update_pmtu(struct s
+ 
+               pf->af->from_sk(&addr, sk);
+               pf->to_sk_daddr(&t->ipaddr, sk);
+-              dst->ops->update_pmtu(dst, sk, NULL, pmtu);
++              dst->ops->update_pmtu(dst, sk, NULL, pmtu, true);
+               pf->to_sk_daddr(&addr, sk);
+ 
+               dst = sctp_transport_dst_check(t);
diff --git a/queue-5.4/net-dsa-bcm_sf2-fix-ip-fragment-location-and-behavior.patch b/queue-5.4/net-dsa-bcm_sf2-fix-ip-fragment-location-and-behavior.patch

new file mode 100644 (file)

index 0000000..c3a5dcd
--- /dev/null
+++ b/queue-5.4/net-dsa-bcm_sf2-fix-ip-fragment-location-and-behavior.patch
@@ -0,0 +1,53 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Fri, 20 Dec 2019 11:24:21 -0800
+Subject: net: dsa: bcm_sf2: Fix IP fragment location and behavior
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 7c3125f0a6ebc17846c5908ad7d6056d66c1c426 ]
+
+The IP fragment is specified through user-defined field as the first
+bit of the first user-defined word. We were previously trying to extract
+it from the user-defined mask which could not possibly work. The ip_frag
+is also supposed to be a boolean, if we do not cast it as such, we risk
+overwriting the next fields in CFP_DATA(6) which would render the rule
+inoperative.
+
+Fixes: 7318166cacad ("net: dsa: bcm_sf2: Add support for ethtool::rxnfc")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2_cfp.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2_cfp.c
++++ b/drivers/net/dsa/bcm_sf2_cfp.c
+@@ -358,7 +358,7 @@ static int bcm_sf2_cfp_ipv4_rule_set(str
+               return -EINVAL;
+       }
+ 
+-      ip_frag = be32_to_cpu(fs->m_ext.data[0]);
++      ip_frag = !!(be32_to_cpu(fs->h_ext.data[0]) & 1);
+ 
+       /* Locate the first rule available */
+       if (fs->location == RX_CLS_LOC_ANY)
+@@ -569,7 +569,7 @@ static int bcm_sf2_cfp_rule_cmp(struct b
+ 
+               if (rule->fs.flow_type != fs->flow_type ||
+                   rule->fs.ring_cookie != fs->ring_cookie ||
+-                  rule->fs.m_ext.data[0] != fs->m_ext.data[0])
++                  rule->fs.h_ext.data[0] != fs->h_ext.data[0])
+                       continue;
+ 
+               switch (fs->flow_type & ~FLOW_EXT) {
+@@ -621,7 +621,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(str
+               return -EINVAL;
+       }
+ 
+-      ip_frag = be32_to_cpu(fs->m_ext.data[0]);
++      ip_frag = !!(be32_to_cpu(fs->h_ext.data[0]) & 1);
+ 
+       layout = &udf_tcpip6_layout;
+       slice_num = bcm_sf2_get_slice_number(layout, 0);
diff --git a/queue-5.4/net-dsa-sja1105-reconcile-the-meaning-of-tpid-and-tpid2-for-e-t-and-p-q-r-s.patch b/queue-5.4/net-dsa-sja1105-reconcile-the-meaning-of-tpid-and-tpid2-for-e-t-and-p-q-r-s.patch

new file mode 100644 (file)

index 0000000..497986f
--- /dev/null
+++ b/queue-5.4/net-dsa-sja1105-reconcile-the-meaning-of-tpid-and-tpid2-for-e-t-and-p-q-r-s.patch
@@ -0,0 +1,90 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vladimir Oltean <olteanv@gmail.com>
+Date: Fri, 27 Dec 2019 03:11:13 +0200
+Subject: net: dsa: sja1105: Reconcile the meaning of TPID and TPID2 for E/T and P/Q/R/S
+
+From: Vladimir Oltean <olteanv@gmail.com>
+
+[ Upstream commit 54fa49ee88138756df0fcf867cb1849904710a8c ]
+
+For first-generation switches (SJA1105E and SJA1105T):
+- TPID means C-Tag (typically 0x8100)
+- TPID2 means S-Tag (typically 0x88A8)
+
+While for the second generation switches (SJA1105P, SJA1105Q, SJA1105R,
+SJA1105S) it is the other way around:
+- TPID means S-Tag (typically 0x88A8)
+- TPID2 means C-Tag (typically 0x8100)
+
+In other words, E/T tags untagged traffic with TPID, and P/Q/R/S with
+TPID2.
+
+So the patch mentioned below fixed VLAN filtering for P/Q/R/S, but broke
+it for E/T.
+
+We strive for a common code path for all switches in the family, so just
+lie in the static config packing functions that TPID and TPID2 are at
+swapped bit offsets than they actually are, for P/Q/R/S. This will make
+both switches understand TPID to be ETH_P_8021Q and TPID2 to be
+ETH_P_8021AD. The meaning from the original E/T was chosen over P/Q/R/S
+because E/T is actually the one with public documentation available
+(UM10944.pdf).
+
+Fixes: f9a1a7646c0d ("net: dsa: sja1105: Reverse TPID and TPID2")
+Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/sja1105/sja1105_main.c          |    8 ++++----
+ drivers/net/dsa/sja1105/sja1105_static_config.c |    7 +++++--
+ 2 files changed, 9 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/dsa/sja1105/sja1105_main.c
++++ b/drivers/net/dsa/sja1105/sja1105_main.c
+@@ -1560,8 +1560,8 @@ static int sja1105_vlan_filtering(struct
+ 
+       if (enabled) {
+               /* Enable VLAN filtering. */
+-              tpid  = ETH_P_8021AD;
+-              tpid2 = ETH_P_8021Q;
++              tpid  = ETH_P_8021Q;
++              tpid2 = ETH_P_8021AD;
+       } else {
+               /* Disable VLAN filtering. */
+               tpid  = ETH_P_SJA1105;
+@@ -1570,9 +1570,9 @@ static int sja1105_vlan_filtering(struct
+ 
+       table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
+       general_params = table->entries;
+-      /* EtherType used to identify outer tagged (S-tag) VLAN traffic */
+-      general_params->tpid = tpid;
+       /* EtherType used to identify inner tagged (C-tag) VLAN traffic */
++      general_params->tpid = tpid;
++      /* EtherType used to identify outer tagged (S-tag) VLAN traffic */
+       general_params->tpid2 = tpid2;
+       /* When VLAN filtering is on, we need to at least be able to
+        * decode management traffic through the "backup plan".
+--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
++++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
+@@ -142,6 +142,9 @@ static size_t sja1105et_general_params_e
+       return size;
+ }
+ 
++/* TPID and TPID2 are intentionally reversed so that semantic
++ * compatibility with E/T is kept.
++ */
+ static size_t
+ sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr,
+                                        enum packing_op op)
+@@ -166,9 +169,9 @@ sja1105pqrs_general_params_entry_packing
+       sja1105_packing(buf, &entry->mirr_port,   141, 139, size, op);
+       sja1105_packing(buf, &entry->vlmarker,    138, 107, size, op);
+       sja1105_packing(buf, &entry->vlmask,      106,  75, size, op);
+-      sja1105_packing(buf, &entry->tpid,         74,  59, size, op);
++      sja1105_packing(buf, &entry->tpid2,        74,  59, size, op);
+       sja1105_packing(buf, &entry->ignore2stf,   58,  58, size, op);
+-      sja1105_packing(buf, &entry->tpid2,        57,  42, size, op);
++      sja1105_packing(buf, &entry->tpid,         57,  42, size, op);
+       sja1105_packing(buf, &entry->queue_ts,     41,  41, size, op);
+       sja1105_packing(buf, &entry->egrmirrvid,   40,  29, size, op);
+       sja1105_packing(buf, &entry->egrmirrpcp,   28,  26, size, op);
diff --git a/queue-5.4/net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch b/queue-5.4/net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch

new file mode 100644 (file)

index 0000000..ee21ddb
--- /dev/null
+++ b/queue-5.4/net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch
@@ -0,0 +1,46 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:12 +0800
+Subject: net/dst: add new function skb_dst_update_pmtu_no_confirm
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 07dc35c6e3cc3c001915d05f5bf21f80a39a0970 ]
+
+Add a new function skb_dst_update_pmtu_no_confirm() for callers who need
+update pmtu but should not do neighbor confirm.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst.h |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -519,6 +519,15 @@ static inline void skb_dst_update_pmtu(s
+               dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
+ }
+ 
++/* update dst pmtu but not do neighbor confirm */
++static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
++{
++      struct dst_entry *dst = skb_dst(skb);
++
++      if (dst && dst->ops->update_pmtu)
++              dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
++}
++
+ static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+                                        struct dst_entry *encap_dst,
+                                        int headroom)
diff --git a/queue-5.4/net-dst-do-not-confirm-neighbor-for-vxlan-and-geneve-pmtu-update.patch b/queue-5.4/net-dst-do-not-confirm-neighbor-for-vxlan-and-geneve-pmtu-update.patch

new file mode 100644 (file)

index 0000000..80a17fc
--- /dev/null
+++ b/queue-5.4/net-dst-do-not-confirm-neighbor-for-vxlan-and-geneve-pmtu-update.patch
@@ -0,0 +1,44 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:16 +0800
+Subject: net/dst: do not confirm neighbor for vxlan and geneve pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit f081042d128a0c7acbd67611def62e1b52e2d294 ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+So disable the neigh confirm for vxlan and geneve pmtu update.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Fixes: a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path")
+Fixes: 52a589d51f10 ("geneve: update skb dst pmtu on tx path")
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Tested-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -535,7 +535,7 @@ static inline void skb_tunnel_check_pmtu
+       u32 encap_mtu = dst_mtu(encap_dst);
+ 
+       if (skb->len > encap_mtu - headroom)
+-              skb_dst_update_pmtu(skb, encap_mtu - headroom);
++              skb_dst_update_pmtu_no_confirm(skb, encap_mtu - headroom);
+ }
+ 
+ #endif /* _NET_DST_H */
diff --git a/queue-5.4/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch b/queue-5.4/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch

new file mode 100644 (file)

index 0000000..8573ac9
--- /dev/null
+++ b/queue-5.4/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch
@@ -0,0 +1,54 @@
+From foo@baz Wed 01 Jan 2020 10:35:32 PM CET
+From: Netanel Belgazal <netanel@amazon.com>
+Date: Tue, 10 Dec 2019 11:27:44 +0000
+Subject: net: ena: fix napi handler misbehavior when the napi budget is zero
+
+From: Netanel Belgazal <netanel@amazon.com>
+
+[ Upstream commit 24dee0c7478d1a1e00abdf5625b7f921467325dc ]
+
+In netpoll the napi handler could be called with budget equal to zero.
+Current ENA napi handler doesn't take that into consideration.
+
+The napi handler handles Rx packets in a do-while loop.
+Currently, the budget check happens only after decrementing the
+budget, therefore the napi handler, in rare cases, could run over
+MAX_INT packets.
+
+In addition to that, this moves all budget related variables to int
+calculation and stop mixing u32 to avoid ambiguity
+
+Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)")
+Signed-off-by: Netanel Belgazal <netanel@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -1238,8 +1238,8 @@ static int ena_io_poll(struct napi_struc
+       struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+       struct ena_ring *tx_ring, *rx_ring;
+ 
+-      u32 tx_work_done;
+-      u32 rx_work_done;
++      int tx_work_done;
++      int rx_work_done = 0;
+       int tx_budget;
+       int napi_comp_call = 0;
+       int ret;
+@@ -1256,7 +1256,11 @@ static int ena_io_poll(struct napi_struc
+       }
+ 
+       tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
+-      rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
++      /* On netpoll the budget is zero and the handler should only clean the
++       * tx completions.
++       */
++      if (likely(budget))
++              rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
+ 
+       /* If the device is about to reset or down, avoid unmask
+        * the interrupt and return 0 so NAPI won't reschedule
diff --git a/queue-5.4/net-marvell-mvpp2-phylink-requires-the-link-interrupt.patch b/queue-5.4/net-marvell-mvpp2-phylink-requires-the-link-interrupt.patch

new file mode 100644 (file)

index 0000000..d47cf8d
--- /dev/null
+++ b/queue-5.4/net-marvell-mvpp2-phylink-requires-the-link-interrupt.patch
@@ -0,0 +1,51 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Tue, 10 Dec 2019 22:33:05 +0000
+Subject: net: marvell: mvpp2: phylink requires the link interrupt
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+[ Upstream commit f3f2364ea14d1cf6bf966542f31eadcf178f1577 ]
+
+phylink requires the MAC to report when its link status changes when
+operating in inband modes.  Failure to report link status changes
+means that phylink has no idea when the link events happen, which
+results in either the network interface's carrier remaining up or
+remaining permanently down.
+
+For example, with a fiber module, if the interface is brought up and
+link is initially established, taking the link down at the far end
+will cut the optical power.  The SFP module's LOS asserts, we
+deactivate the link, and the network interface reports no carrier.
+
+When the far end is brought back up, the SFP module's LOS deasserts,
+but the MAC may be slower to establish link.  If this happens (which
+in my tests is a certainty) then phylink never hears that the MAC
+has established link with the far end, and the network interface is
+stuck reporting no carrier.  This means the interface is
+non-functional.
+
+Avoiding the link interrupt when we have phylink is basically not
+an option, so remove the !port->phylink from the test.
+
+Fixes: 4bb043262878 ("net: mvpp2: phylink support")
+Tested-by: Sven Auhagen <sven.auhagen@voleatech.de>
+Tested-by: Antoine Tenart <antoine.tenart@bootlin.com>
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+@@ -3674,7 +3674,7 @@ static int mvpp2_open(struct net_device
+               valid = true;
+       }
+ 
+-      if (priv->hw_version == MVPP22 && port->link_irq && !port->phylink) {
++      if (priv->hw_version == MVPP22 && port->link_irq) {
+               err = request_irq(port->link_irq, mvpp2_link_status_isr, 0,
+                                 dev->name, port);
+               if (err) {
diff --git a/queue-5.4/net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch b/queue-5.4/net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch

new file mode 100644 (file)

index 0000000..c955d73
--- /dev/null
+++ b/queue-5.4/net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch
@@ -0,0 +1,62 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+Date: Thu, 26 Dec 2019 10:41:56 +0200
+Subject: net/mlxfw: Fix out-of-memory error in mfa2 flash burning
+
+From: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+
+[ Upstream commit a5bcd72e054aabb93ddc51ed8cde36a5bfc50271 ]
+
+The burning process requires to perform internal allocations of large
+chunks of memory. This memory doesn't need to be contiguous and can be
+safely allocated by vzalloc() instead of kzalloc(). This patch changes
+such allocation to avoid possible out-of-memory failure.
+
+Fixes: 410ed13cae39 ("Add the mlxfw module for Mellanox firmware flash process")
+Signed-off-by: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+Reviewed-by: Aya Levin <ayal@mellanox.com>
+Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
+Tested-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
++++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
+@@ -6,6 +6,7 @@
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/netlink.h>
++#include <linux/vmalloc.h>
+ #include <linux/xz.h>
+ #include "mlxfw_mfa2.h"
+ #include "mlxfw_mfa2_file.h"
+@@ -548,7 +549,7 @@ mlxfw_mfa2_file_component_get(const stru
+       comp_size = be32_to_cpu(comp->size);
+       comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len;
+ 
+-      comp_data = kmalloc(sizeof(*comp_data) + comp_buf_size, GFP_KERNEL);
++      comp_data = vzalloc(sizeof(*comp_data) + comp_buf_size);
+       if (!comp_data)
+               return ERR_PTR(-ENOMEM);
+       comp_data->comp.data_size = comp_size;
+@@ -570,7 +571,7 @@ mlxfw_mfa2_file_component_get(const stru
+       comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len;
+       return &comp_data->comp;
+ err_out:
+-      kfree(comp_data);
++      vfree(comp_data);
+       return ERR_PTR(err);
+ }
+ 
+@@ -579,7 +580,7 @@ void mlxfw_mfa2_file_component_put(struc
+       const struct mlxfw_mfa2_comp_data *comp_data;
+ 
+       comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp);
+-      kfree(comp_data);
++      vfree(comp_data);
+ }
+ 
+ void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file)
diff --git a/queue-5.4/net-phy-aquantia-add-suspend-resume-ops-for-aqr105.patch b/queue-5.4/net-phy-aquantia-add-suspend-resume-ops-for-aqr105.patch

new file mode 100644 (file)

index 0000000..ea1e899
--- /dev/null
+++ b/queue-5.4/net-phy-aquantia-add-suspend-resume-ops-for-aqr105.patch
@@ -0,0 +1,32 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Madalin Bucur <madalin.bucur@oss.nxp.com>
+Date: Mon, 23 Dec 2019 10:06:10 +0200
+Subject: net: phy: aquantia: add suspend / resume ops for AQR105
+
+From: Madalin Bucur <madalin.bucur@oss.nxp.com>
+
+[ Upstream commit 1c93fb45761e79b3c00080e71523886cefaf351c ]
+
+The suspend/resume code for AQR107 works on AQR105 too.
+This patch fixes issues with the partner not seeing the link down
+when the interface using AQR105 is brought down.
+
+Fixes: bee8259dd31f ("net: phy: add driver for aquantia phy")
+Signed-off-by: Madalin Bucur <madalin.bucur@oss.nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/aquantia_main.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/phy/aquantia_main.c
++++ b/drivers/net/phy/aquantia_main.c
+@@ -627,6 +627,8 @@ static struct phy_driver aqr_driver[] =
+       .config_intr    = aqr_config_intr,
+       .ack_interrupt  = aqr_ack_interrupt,
+       .read_status    = aqr_read_status,
++      .suspend        = aqr107_suspend,
++      .resume         = aqr107_resume,
+ },
+ {
+       PHY_ID_MATCH_MODEL(PHY_ID_AQR106),
diff --git a/queue-5.4/net-phylink-fix-interface-passed-to-mac_link_up.patch b/queue-5.4/net-phylink-fix-interface-passed-to-mac_link_up.patch

new file mode 100644 (file)

index 0000000..86c4183
--- /dev/null
+++ b/queue-5.4/net-phylink-fix-interface-passed-to-mac_link_up.patch
@@ -0,0 +1,37 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Fri, 13 Dec 2019 10:06:30 +0000
+Subject: net: phylink: fix interface passed to mac_link_up
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+[ Upstream commit 9b2079c046a9d6c9c73a4ec33816678565ee01f3 ]
+
+A mismerge between the following two commits:
+
+c678726305b9 ("net: phylink: ensure consistent phy interface mode")
+27755ff88c0e ("net: phylink: Add phylink_mac_link_{up, down} wrapper functions")
+
+resulted in the wrong interface being passed to the mac_link_up()
+function. Fix this up.
+
+Fixes: b4b12b0d2f02 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phylink.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/phy/phylink.c
++++ b/drivers/net/phy/phylink.c
+@@ -444,8 +444,7 @@ static void phylink_mac_link_up(struct p
+ 
+       pl->cur_interface = link_state.interface;
+       pl->ops->mac_link_up(pl->config, pl->link_an_mode,
+-                           pl->phy_state.interface,
+-                           pl->phydev);
++                           pl->cur_interface, pl->phydev);
+ 
+       if (ndev)
+               netif_carrier_on(ndev);
diff --git a/queue-5.4/net-sched-act_mirred-pull-mac-prior-redir-to-non-mac_header_xmit-device.patch b/queue-5.4/net-sched-act_mirred-pull-mac-prior-redir-to-non-mac_header_xmit-device.patch

new file mode 100644 (file)

index 0000000..c4f3ee0
--- /dev/null
+++ b/queue-5.4/net-sched-act_mirred-pull-mac-prior-redir-to-non-mac_header_xmit-device.patch
@@ -0,0 +1,87 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Shmulik Ladkani <sladkani@proofpoint.com>
+Date: Wed, 25 Dec 2019 10:51:01 +0200
+Subject: net/sched: act_mirred: Pull mac prior redir to non mac_header_xmit device
+
+From: Shmulik Ladkani <sladkani@proofpoint.com>
+
+[ Upstream commit 70cf3dc7313207816255b9acb0dffb19dae78144 ]
+
+There's no skb_pull performed when a mirred action is set at egress of a
+mac device, with a target device/action that expects skb->data to point
+at the network header.
+
+As a result, either the target device is errornously given an skb with
+data pointing to the mac (egress case), or the net stack receives the
+skb with data pointing to the mac (ingress case).
+
+E.g:
+ # tc qdisc add dev eth9 root handle 1: prio
+ # tc filter add dev eth9 parent 1: prio 9 protocol ip handle 9 basic \
+   action mirred egress redirect dev tun0
+
+ (tun0 is a tun device. result: tun0 errornously gets the eth header
+  instead of the iph)
+
+Revise the push/pull logic of tcf_mirred_act() to not rely on the
+skb_at_tc_ingress() vs tcf_mirred_act_wants_ingress() comparison, as it
+does not cover all "pull" cases.
+
+Instead, calculate whether the required action on the target device
+requires the data to point at the network header, and compare this to
+whether skb->data points to network header - and make the push/pull
+adjustments as necessary.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Shmulik Ladkani <sladkani@proofpoint.com>
+Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_mirred.c |   22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+--- a/net/sched/act_mirred.c
++++ b/net/sched/act_mirred.c
+@@ -219,8 +219,10 @@ static int tcf_mirred_act(struct sk_buff
+       bool use_reinsert;
+       bool want_ingress;
+       bool is_redirect;
++      bool expects_nh;
+       int m_eaction;
+       int mac_len;
++      bool at_nh;
+ 
+       rec_level = __this_cpu_inc_return(mirred_rec_level);
+       if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) {
+@@ -261,19 +263,19 @@ static int tcf_mirred_act(struct sk_buff
+                       goto out;
+       }
+ 
+-      /* If action's target direction differs than filter's direction,
+-       * and devices expect a mac header on xmit, then mac push/pull is
+-       * needed.
+-       */
+       want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
+-      if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) {
+-              if (!skb_at_tc_ingress(skb)) {
+-                      /* caught at egress, act ingress: pull mac */
+-                      mac_len = skb_network_header(skb) - skb_mac_header(skb);
++
++      expects_nh = want_ingress || !m_mac_header_xmit;
++      at_nh = skb->data == skb_network_header(skb);
++      if (at_nh != expects_nh) {
++              mac_len = skb_at_tc_ingress(skb) ? skb->mac_len :
++                        skb_network_header(skb) - skb_mac_header(skb);
++              if (expects_nh) {
++                      /* target device/action expect data at nh */
+                       skb_pull_rcsum(skb2, mac_len);
+               } else {
+-                      /* caught at ingress, act egress: push mac */
+-                      skb_push_rcsum(skb2, skb->mac_len);
++                      /* target device/action expect data at mac */
++                      skb_push_rcsum(skb2, mac_len);
+               }
+       }
+ 
diff --git a/queue-5.4/net-sched-add-delete_empty-to-filters-and-use-it-in-cls_flower.patch b/queue-5.4/net-sched-add-delete_empty-to-filters-and-use-it-in-cls_flower.patch

new file mode 100644 (file)

index 0000000..26519e3
--- /dev/null
+++ b/queue-5.4/net-sched-add-delete_empty-to-filters-and-use-it-in-cls_flower.patch
@@ -0,0 +1,152 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Davide Caratti <dcaratti@redhat.com>
+Date: Sat, 28 Dec 2019 16:36:58 +0100
+Subject: net/sched: add delete_empty() to filters and use it in cls_flower
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+[ Upstream commit a5b72a083da197b493c7ed1e5730d62d3199f7d6 ]
+
+Revert "net/sched: cls_u32: fix refcount leak in the error path of
+u32_change()", and fix the u32 refcount leak in a more generic way that
+preserves the semantic of rule dumping.
+On tc filters that don't support lockless insertion/removal, there is no
+need to guard against concurrent insertion when a removal is in progress.
+Therefore, for most of them we can avoid a full walk() when deleting, and
+just decrease the refcount, like it was done on older Linux kernels.
+This fixes situations where walk() was wrongly detecting a non-empty
+filter, like it happened with cls_u32 in the error path of change(), thus
+leading to failures in the following tdc selftests:
+
+ 6aa7: (filter, u32) Add/Replace u32 with source match and invalid indev
+ 6658: (filter, u32) Add/Replace u32 with custom hash table and invalid handle
+ 74c2: (filter, u32) Add/Replace u32 filter with invalid hash table id
+
+On cls_flower, and on (future) lockless filters, this check is necessary:
+move all the check_empty() logic in a callback so that each filter
+can have its own implementation. For cls_flower, it's sufficient to check
+if no IDRs have been allocated.
+
+This reverts commit 275c44aa194b7159d1191817b20e076f55f0e620.
+
+Changes since v1:
+ - document the need for delete_empty() when TCF_PROTO_OPS_DOIT_UNLOCKED
+   is used, thanks to Vlad Buslov
+ - implement delete_empty() without doing fl_walk(), thanks to Vlad Buslov
+ - squash revert and new fix in a single patch, to be nice with bisect
+   tests that run tdc on u32 filter, thanks to Dave Miller
+
+Fixes: 275c44aa194b ("net/sched: cls_u32: fix refcount leak in the error path of u32_change()")
+Fixes: 6676d5e416ee ("net: sched: set dedicated tcf_walker flag when tp is empty")
+Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Suggested-by: Vlad Buslov <vladbu@mellanox.com>
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
+Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sch_generic.h |    5 +++++
+ net/sched/cls_api.c       |   31 +++++--------------------------
+ net/sched/cls_flower.c    |   12 ++++++++++++
+ 3 files changed, 22 insertions(+), 26 deletions(-)
+
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -308,6 +308,7 @@ struct tcf_proto_ops {
+       int                     (*delete)(struct tcf_proto *tp, void *arg,
+                                         bool *last, bool rtnl_held,
+                                         struct netlink_ext_ack *);
++      bool                    (*delete_empty)(struct tcf_proto *tp);
+       void                    (*walk)(struct tcf_proto *tp,
+                                       struct tcf_walker *arg, bool rtnl_held);
+       int                     (*reoffload)(struct tcf_proto *tp, bool add,
+@@ -336,6 +337,10 @@ struct tcf_proto_ops {
+       int                     flags;
+ };
+ 
++/* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags
++ * are expected to implement tcf_proto_ops->delete_empty(), otherwise race
++ * conditions can occur when filters are inserted/deleted simultaneously.
++ */
+ enum tcf_proto_ops_flags {
+       TCF_PROTO_OPS_DOIT_UNLOCKED = 1,
+ };
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -308,33 +308,12 @@ static void tcf_proto_put(struct tcf_pro
+               tcf_proto_destroy(tp, rtnl_held, true, extack);
+ }
+ 
+-static int walker_check_empty(struct tcf_proto *tp, void *fh,
+-                            struct tcf_walker *arg)
++static bool tcf_proto_check_delete(struct tcf_proto *tp)
+ {
+-      if (fh) {
+-              arg->nonempty = true;
+-              return -1;
+-      }
+-      return 0;
+-}
+-
+-static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
+-{
+-      struct tcf_walker walker = { .fn = walker_check_empty, };
+-
+-      if (tp->ops->walk) {
+-              tp->ops->walk(tp, &walker, rtnl_held);
+-              return !walker.nonempty;
+-      }
+-      return true;
+-}
++      if (tp->ops->delete_empty)
++              return tp->ops->delete_empty(tp);
+ 
+-static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
+-{
+-      spin_lock(&tp->lock);
+-      if (tcf_proto_is_empty(tp, rtnl_held))
+-              tp->deleting = true;
+-      spin_unlock(&tp->lock);
++      tp->deleting = true;
+       return tp->deleting;
+ }
+ 
+@@ -1751,7 +1730,7 @@ static void tcf_chain_tp_delete_empty(st
+        * concurrently.
+        * Mark tp for deletion if it is empty.
+        */
+-      if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
++      if (!tp_iter || !tcf_proto_check_delete(tp)) {
+               mutex_unlock(&chain->filter_chain_lock);
+               return;
+       }
+--- a/net/sched/cls_flower.c
++++ b/net/sched/cls_flower.c
+@@ -2519,6 +2519,17 @@ static void fl_bind_class(void *fh, u32
+               f->res.class = cl;
+ }
+ 
++static bool fl_delete_empty(struct tcf_proto *tp)
++{
++      struct cls_fl_head *head = fl_head_dereference(tp);
++
++      spin_lock(&tp->lock);
++      tp->deleting = idr_is_empty(&head->handle_idr);
++      spin_unlock(&tp->lock);
++
++      return tp->deleting;
++}
++
+ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
+       .kind           = "flower",
+       .classify       = fl_classify,
+@@ -2528,6 +2539,7 @@ static struct tcf_proto_ops cls_fl_ops _
+       .put            = fl_put,
+       .change         = fl_change,
+       .delete         = fl_delete,
++      .delete_empty   = fl_delete_empty,
+       .walk           = fl_walk,
+       .reoffload      = fl_reoffload,
+       .hw_add         = fl_hw_add,
diff --git a/queue-5.4/net-stmmac-dwmac-meson8b-fix-the-rgmii-tx-delay-on-meson8b-8m2-socs.patch b/queue-5.4/net-stmmac-dwmac-meson8b-fix-the-rgmii-tx-delay-on-meson8b-8m2-socs.patch

new file mode 100644 (file)

index 0000000..4b2a1ce
--- /dev/null
+++ b/queue-5.4/net-stmmac-dwmac-meson8b-fix-the-rgmii-tx-delay-on-meson8b-8m2-socs.patch
@@ -0,0 +1,80 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Date: Thu, 26 Dec 2019 20:01:01 +0100
+Subject: net: stmmac: dwmac-meson8b: Fix the RGMII TX delay on Meson8b/8m2 SoCs
+
+From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+[ Upstream commit bd6f48546b9cb7a785344fc78058c420923d7ed8 ]
+
+GXBB and newer SoCs use the fixed FCLK_DIV2 (1GHz) clock as input for
+the m250_sel clock. Meson8b and Meson8m2 use MPLL2 instead, whose rate
+can be adjusted at runtime.
+
+So far we have been running MPLL2 with ~250MHz (and the internal
+m250_div with value 1), which worked enough that we could transfer data
+with an TX delay of 4ns. Unfortunately there is high packet loss with
+an RGMII PHY when transferring data (receiving data works fine though).
+Odroid-C1's u-boot is running with a TX delay of only 2ns as well as
+the internal m250_div set to 2 - no lost (TX) packets can be observed
+with that setting in u-boot.
+
+Manual testing has shown that the TX packet loss goes away when using
+the following settings in Linux (the vendor kernel uses the same
+settings):
+- MPLL2 clock set to ~500MHz
+- m250_div set to 2
+- TX delay set to 2ns on the MAC side
+
+Update the m250_div divider settings to only accept dividers greater or
+equal 2 to fix the TX delay generated by the MAC.
+
+iperf3 results before the change:
+[ ID] Interval           Transfer     Bitrate         Retr
+[  5]   0.00-10.00  sec   182 MBytes   153 Mbits/sec  514      sender
+[  5]   0.00-10.00  sec   182 MBytes   152 Mbits/sec           receiver
+
+iperf3 results after the change (including an updated TX delay of 2ns):
+[ ID] Interval           Transfer     Bitrate         Retr  Cwnd
+[  5]   0.00-10.00  sec   927 MBytes   778 Mbits/sec    0      sender
+[  5]   0.00-10.01  sec   927 MBytes   777 Mbits/sec           receiver
+
+Fixes: 4f6a71b84e1afd ("net: stmmac: dwmac-meson8b: fix internal RGMII clock configuration")
+Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+@@ -112,6 +112,14 @@ static int meson8b_init_rgmii_tx_clk(str
+       struct device *dev = dwmac->dev;
+       const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
+       struct meson8b_dwmac_clk_configs *clk_configs;
++      static const struct clk_div_table div_table[] = {
++              { .div = 2, .val = 2, },
++              { .div = 3, .val = 3, },
++              { .div = 4, .val = 4, },
++              { .div = 5, .val = 5, },
++              { .div = 6, .val = 6, },
++              { .div = 7, .val = 7, },
++      };
+ 
+       clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL);
+       if (!clk_configs)
+@@ -146,9 +154,9 @@ static int meson8b_init_rgmii_tx_clk(str
+       clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0;
+       clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
+       clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
+-      clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED |
+-                              CLK_DIVIDER_ALLOW_ZERO |
+-                              CLK_DIVIDER_ROUND_CLOSEST;
++      clk_configs->m250_div.table = div_table;
++      clk_configs->m250_div.flags = CLK_DIVIDER_ALLOW_ZERO |
++                                    CLK_DIVIDER_ROUND_CLOSEST;
+       clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1,
+                                        &clk_divider_ops,
+                                        &clk_configs->m250_div.hw);
diff --git a/queue-5.4/net_sched-sch_fq-properly-set-sk-sk_pacing_status.patch b/queue-5.4/net_sched-sch_fq-properly-set-sk-sk_pacing_status.patch

new file mode 100644 (file)

index 0000000..cdedc61
--- /dev/null
+++ b/queue-5.4/net_sched-sch_fq-properly-set-sk-sk_pacing_status.patch
@@ -0,0 +1,73 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 23 Dec 2019 11:13:24 -0800
+Subject: net_sched: sch_fq: properly set sk->sk_pacing_status
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit bb3d0b8bf5be61ab1d6f472c43cbf34de17e796b ]
+
+If fq_classify() recycles a struct fq_flow because
+a socket structure has been reallocated, we do not
+set sk->sk_pacing_status immediately, but later if the
+flow becomes detached.
+
+This means that any flow requiring pacing (BBR, or SO_MAX_PACING_RATE)
+might fallback to TCP internal pacing, which requires a per-socket
+high resolution timer, and therefore more cpu cycles.
+
+Fixes: 218af599fa63 ("tcp: internal implementation for pacing")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Soheil Hassas Yeganeh <soheil@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_fq.c |   17 ++++++++---------
+ 1 file changed, 8 insertions(+), 9 deletions(-)
+
+--- a/net/sched/sch_fq.c
++++ b/net/sched/sch_fq.c
+@@ -301,6 +301,9 @@ static struct fq_flow *fq_classify(struc
+                                    f->socket_hash != sk->sk_hash)) {
+                               f->credit = q->initial_quantum;
+                               f->socket_hash = sk->sk_hash;
++                              if (q->rate_enable)
++                                      smp_store_release(&sk->sk_pacing_status,
++                                                        SK_PACING_FQ);
+                               if (fq_flow_is_throttled(f))
+                                       fq_flow_unset_throttled(q, f);
+                               f->time_next_packet = 0ULL;
+@@ -322,8 +325,12 @@ static struct fq_flow *fq_classify(struc
+ 
+       fq_flow_set_detached(f);
+       f->sk = sk;
+-      if (skb->sk == sk)
++      if (skb->sk == sk) {
+               f->socket_hash = sk->sk_hash;
++              if (q->rate_enable)
++                      smp_store_release(&sk->sk_pacing_status,
++                                        SK_PACING_FQ);
++      }
+       f->credit = q->initial_quantum;
+ 
+       rb_link_node(&f->fq_node, parent, p);
+@@ -428,17 +435,9 @@ static int fq_enqueue(struct sk_buff *sk
+       f->qlen++;
+       qdisc_qstats_backlog_inc(sch, skb);
+       if (fq_flow_is_detached(f)) {
+-              struct sock *sk = skb->sk;
+-
+               fq_flow_add_tail(&q->new_flows, f);
+               if (time_after(jiffies, f->age + q->flow_refill_delay))
+                       f->credit = max_t(u32, f->credit, q->quantum);
+-              if (sk && q->rate_enable) {
+-                      if (unlikely(smp_load_acquire(&sk->sk_pacing_status) !=
+-                                   SK_PACING_FQ))
+-                              smp_store_release(&sk->sk_pacing_status,
+-                                                SK_PACING_FQ);
+-              }
+               q->inactive_flows--;
+       }
+ 
diff --git a/queue-5.4/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch b/queue-5.4/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch

new file mode 100644 (file)

index 0000000..bfa8fde
--- /dev/null
+++ b/queue-5.4/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch
@@ -0,0 +1,317 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Vladis Dronov <vdronov@redhat.com>
+Date: Fri, 27 Dec 2019 03:26:27 +0100
+Subject: ptp: fix the race between the release of ptp_clock and cdev
+
+From: Vladis Dronov <vdronov@redhat.com>
+
+[ Upstream commit a33121e5487b424339636b25c35d3a180eaa5f5e ]
+
+In a case when a ptp chardev (like /dev/ptp0) is open but an underlying
+device is removed, closing this file leads to a race. This reproduces
+easily in a kvm virtual machine:
+
+ts# cat openptp0.c
+int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); }
+ts# uname -r
+5.5.0-rc3-46cf053e
+ts# cat /proc/cmdline
+... slub_debug=FZP
+ts# modprobe ptp_kvm
+ts# ./openptp0 &
+[1] 670
+opened /dev/ptp0, sleeping 10s...
+ts# rmmod ptp_kvm
+ts# ls /dev/ptp*
+ls: cannot access '/dev/ptp*': No such file or directory
+ts# ...woken up
+[   48.010809] general protection fault: 0000 [#1] SMP
+[   48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25
+[   48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ...
+[   48.016270] RIP: 0010:module_put.part.0+0x7/0x80
+[   48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202
+[   48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0
+[   48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b
+[   48.019470] ...                                              ^^^ a slub poison
+[   48.023854] Call Trace:
+[   48.024050]  __fput+0x21f/0x240
+[   48.024288]  task_work_run+0x79/0x90
+[   48.024555]  do_exit+0x2af/0xab0
+[   48.024799]  ? vfs_write+0x16a/0x190
+[   48.025082]  do_group_exit+0x35/0x90
+[   48.025387]  __x64_sys_exit_group+0xf/0x10
+[   48.025737]  do_syscall_64+0x3d/0x130
+[   48.026056]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[   48.026479] RIP: 0033:0x7f53b12082f6
+[   48.026792] ...
+[   48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm]
+[   48.045001] Fixing recursive fault but reboot is needed!
+
+This happens in:
+
+static void __fput(struct file *file)
+{   ...
+    if (file->f_op->release)
+        file->f_op->release(inode, file); <<< cdev is kfree'd here
+    if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
+             !(mode & FMODE_PATH))) {
+        cdev_put(inode->i_cdev); <<< cdev fields are accessed here
+
+Namely:
+
+__fput()
+  posix_clock_release()
+    kref_put(&clk->kref, delete_clock) <<< the last reference
+      delete_clock()
+        delete_ptp_clock()
+          kfree(ptp) <<< cdev is embedded in ptp
+  cdev_put
+    module_put(p->owner) <<< *p is kfree'd, bang!
+
+Here cdev is embedded in posix_clock which is embedded in ptp_clock.
+The race happens because ptp_clock's lifetime is controlled by two
+refcounts: kref and cdev.kobj in posix_clock. This is wrong.
+
+Make ptp_clock's sysfs device a parent of cdev with cdev_device_add()
+created especially for such cases. This way the parent device with its
+ptp_clock is not released until all references to the cdev are released.
+This adds a requirement that an initialized but not exposed struct
+device should be provided to posix_clock_register() by a caller instead
+of a simple dev_t.
+
+This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix
+the race between the release of watchdog_core_data and cdev"). See
+details of the implementation in the commit 233ed09d7fda ("chardev: add
+helper function to register char devs with a struct device").
+
+Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u
+Analyzed-by: Stephen Johnston <sjohnsto@redhat.com>
+Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com>
+Signed-off-by: Vladis Dronov <vdronov@redhat.com>
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ptp/ptp_clock.c     |   31 ++++++++++++++-----------------
+ drivers/ptp/ptp_private.h   |    2 +-
+ include/linux/posix-clock.h |   19 +++++++++++--------
+ kernel/time/posix-clock.c   |   31 +++++++++++++------------------
+ 4 files changed, 39 insertions(+), 44 deletions(-)
+
+--- a/drivers/ptp/ptp_clock.c
++++ b/drivers/ptp/ptp_clock.c
+@@ -166,9 +166,9 @@ static struct posix_clock_operations ptp
+       .read           = ptp_read,
+ };
+ 
+-static void delete_ptp_clock(struct posix_clock *pc)
++static void ptp_clock_release(struct device *dev)
+ {
+-      struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
++      struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev);
+ 
+       mutex_destroy(&ptp->tsevq_mux);
+       mutex_destroy(&ptp->pincfg_mux);
+@@ -213,7 +213,6 @@ struct ptp_clock *ptp_clock_register(str
+       }
+ 
+       ptp->clock.ops = ptp_clock_ops;
+-      ptp->clock.release = delete_ptp_clock;
+       ptp->info = info;
+       ptp->devid = MKDEV(major, index);
+       ptp->index = index;
+@@ -236,15 +235,6 @@ struct ptp_clock *ptp_clock_register(str
+       if (err)
+               goto no_pin_groups;
+ 
+-      /* Create a new device in our class. */
+-      ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid,
+-                                           ptp, ptp->pin_attr_groups,
+-                                           "ptp%d", ptp->index);
+-      if (IS_ERR(ptp->dev)) {
+-              err = PTR_ERR(ptp->dev);
+-              goto no_device;
+-      }
+-
+       /* Register a new PPS source. */
+       if (info->pps) {
+               struct pps_source_info pps;
+@@ -260,8 +250,18 @@ struct ptp_clock *ptp_clock_register(str
+               }
+       }
+ 
+-      /* Create a posix clock. */
+-      err = posix_clock_register(&ptp->clock, ptp->devid);
++      /* Initialize a new device of our class in our clock structure. */
++      device_initialize(&ptp->dev);
++      ptp->dev.devt = ptp->devid;
++      ptp->dev.class = ptp_class;
++      ptp->dev.parent = parent;
++      ptp->dev.groups = ptp->pin_attr_groups;
++      ptp->dev.release = ptp_clock_release;
++      dev_set_drvdata(&ptp->dev, ptp);
++      dev_set_name(&ptp->dev, "ptp%d", ptp->index);
++
++      /* Create a posix clock and link it to the device. */
++      err = posix_clock_register(&ptp->clock, &ptp->dev);
+       if (err) {
+               pr_err("failed to create posix clock\n");
+               goto no_clock;
+@@ -273,8 +273,6 @@ no_clock:
+       if (ptp->pps_source)
+               pps_unregister_source(ptp->pps_source);
+ no_pps:
+-      device_destroy(ptp_class, ptp->devid);
+-no_device:
+       ptp_cleanup_pin_groups(ptp);
+ no_pin_groups:
+       if (ptp->kworker)
+@@ -304,7 +302,6 @@ int ptp_clock_unregister(struct ptp_cloc
+       if (ptp->pps_source)
+               pps_unregister_source(ptp->pps_source);
+ 
+-      device_destroy(ptp_class, ptp->devid);
+       ptp_cleanup_pin_groups(ptp);
+ 
+       posix_clock_unregister(&ptp->clock);
+--- a/drivers/ptp/ptp_private.h
++++ b/drivers/ptp/ptp_private.h
+@@ -28,7 +28,7 @@ struct timestamp_event_queue {
+ 
+ struct ptp_clock {
+       struct posix_clock clock;
+-      struct device *dev;
++      struct device dev;
+       struct ptp_clock_info *info;
+       dev_t devid;
+       int index; /* index into clocks.map */
+--- a/include/linux/posix-clock.h
++++ b/include/linux/posix-clock.h
+@@ -69,29 +69,32 @@ struct posix_clock_operations {
+  *
+  * @ops:     Functional interface to the clock
+  * @cdev:    Character device instance for this clock
+- * @kref:    Reference count.
++ * @dev:     Pointer to the clock's device.
+  * @rwsem:   Protects the 'zombie' field from concurrent access.
+  * @zombie:  If 'zombie' is true, then the hardware has disappeared.
+- * @release: A function to free the structure when the reference count reaches
+- *           zero. May be NULL if structure is statically allocated.
+  *
+  * Drivers should embed their struct posix_clock within a private
+  * structure, obtaining a reference to it during callbacks using
+  * container_of().
++ *
++ * Drivers should supply an initialized but not exposed struct device
++ * to posix_clock_register(). It is used to manage lifetime of the
++ * driver's private structure. It's 'release' field should be set to
++ * a release function for this private structure.
+  */
+ struct posix_clock {
+       struct posix_clock_operations ops;
+       struct cdev cdev;
+-      struct kref kref;
++      struct device *dev;
+       struct rw_semaphore rwsem;
+       bool zombie;
+-      void (*release)(struct posix_clock *clk);
+ };
+ 
+ /**
+  * posix_clock_register() - register a new clock
+- * @clk:   Pointer to the clock. Caller must provide 'ops' and 'release'
+- * @devid: Allocated device id
++ * @clk:   Pointer to the clock. Caller must provide 'ops' field
++ * @dev:   Pointer to the initialized device. Caller must provide
++ *         'release' field
+  *
+  * A clock driver calls this function to register itself with the
+  * clock device subsystem. If 'clk' points to dynamically allocated
+@@ -100,7 +103,7 @@ struct posix_clock {
+  *
+  * Returns zero on success, non-zero otherwise.
+  */
+-int posix_clock_register(struct posix_clock *clk, dev_t devid);
++int posix_clock_register(struct posix_clock *clk, struct device *dev);
+ 
+ /**
+  * posix_clock_unregister() - unregister a clock
+--- a/kernel/time/posix-clock.c
++++ b/kernel/time/posix-clock.c
+@@ -14,8 +14,6 @@
+ 
+ #include "posix-timers.h"
+ 
+-static void delete_clock(struct kref *kref);
+-
+ /*
+  * Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
+  */
+@@ -125,7 +123,7 @@ static int posix_clock_open(struct inode
+               err = 0;
+ 
+       if (!err) {
+-              kref_get(&clk->kref);
++              get_device(clk->dev);
+               fp->private_data = clk;
+       }
+ out:
+@@ -141,7 +139,7 @@ static int posix_clock_release(struct in
+       if (clk->ops.release)
+               err = clk->ops.release(clk);
+ 
+-      kref_put(&clk->kref, delete_clock);
++      put_device(clk->dev);
+ 
+       fp->private_data = NULL;
+ 
+@@ -161,38 +159,35 @@ static const struct file_operations posi
+ #endif
+ };
+ 
+-int posix_clock_register(struct posix_clock *clk, dev_t devid)
++int posix_clock_register(struct posix_clock *clk, struct device *dev)
+ {
+       int err;
+ 
+-      kref_init(&clk->kref);
+       init_rwsem(&clk->rwsem);
+ 
+       cdev_init(&clk->cdev, &posix_clock_file_operations);
++      err = cdev_device_add(&clk->cdev, dev);
++      if (err) {
++              pr_err("%s unable to add device %d:%d\n",
++                      dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt));
++              return err;
++      }
+       clk->cdev.owner = clk->ops.owner;
+-      err = cdev_add(&clk->cdev, devid, 1);
++      clk->dev = dev;
+ 
+-      return err;
++      return 0;
+ }
+ EXPORT_SYMBOL_GPL(posix_clock_register);
+ 
+-static void delete_clock(struct kref *kref)
+-{
+-      struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
+-
+-      if (clk->release)
+-              clk->release(clk);
+-}
+-
+ void posix_clock_unregister(struct posix_clock *clk)
+ {
+-      cdev_del(&clk->cdev);
++      cdev_device_del(&clk->cdev, clk->dev);
+ 
+       down_write(&clk->rwsem);
+       clk->zombie = true;
+       up_write(&clk->rwsem);
+ 
+-      kref_put(&clk->kref, delete_clock);
++      put_device(clk->dev);
+ }
+ EXPORT_SYMBOL_GPL(posix_clock_unregister);
+ 
diff --git a/queue-5.4/series b/queue-5.4/series

index 9489293098c93846e5b527a79b73b0f05c1f43cd..fedb3132dad965a9aada180ad35ffe72bde1b62a 100644 (file)
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -145,3 +145,46 @@ tomoyo-don-t-use-nifty-names-on-sockets.patch
  uaccess-disallow-int_max-copy-sizes.patch
  drm-limit-to-int_max-in-create_blob-ioctl.patch
  xfs-fix-mount-failure-crash-on-invalid-iclog-memory-access.patch
+cxgb4-cxgb4vf-fix-flow-control-display-for-auto-negotiation.patch
+net-dsa-bcm_sf2-fix-ip-fragment-location-and-behavior.patch
+net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch
+net-phy-aquantia-add-suspend-resume-ops-for-aqr105.patch
+net-sched-act_mirred-pull-mac-prior-redir-to-non-mac_header_xmit-device.patch
+net-sched-add-delete_empty-to-filters-and-use-it-in-cls_flower.patch
+net_sched-sch_fq-properly-set-sk-sk_pacing_status.patch
+net-stmmac-dwmac-meson8b-fix-the-rgmii-tx-delay-on-meson8b-8m2-socs.patch
+ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch
+tcp-fix-highest_sack-and-highest_sack_seq.patch
+udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch
+bnxt_en-fix-msix-request-logic-for-rdma-driver.patch
+bnxt_en-free-context-memory-in-the-open-path-if-firmware-has-been-reset.patch
+bnxt_en-return-error-if-fw-returns-more-data-than-dump-length.patch
+bnxt_en-fix-bp-fw_health-allocation-and-free-logic.patch
+bnxt_en-remove-unnecessary-null-checks-for-fw_health.patch
+bnxt_en-fix-the-logic-that-creates-the-health-reporters.patch
+bnxt_en-add-missing-devlink-health-reporters-for-vfs.patch
+mlxsw-spectrum_router-skip-loopback-rifs-during-mac-validation.patch
+mlxsw-spectrum-use-dedicated-policer-for-vrrp-packets.patch
+net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch
+ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch
+gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch
+net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch
+tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch
+vti-do-not-confirm-neighbor-when-do-pmtu-update.patch
+sit-do-not-confirm-neighbor-when-do-pmtu-update.patch
+net-dst-do-not-confirm-neighbor-for-vxlan-and-geneve-pmtu-update.patch
+net-dsa-sja1105-reconcile-the-meaning-of-tpid-and-tpid2-for-e-t-and-p-q-r-s.patch
+net-marvell-mvpp2-phylink-requires-the-link-interrupt.patch
+gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch
+gtp-avoid-zero-size-hashtable.patch
+bonding-fix-active-backup-transition-after-link-failure.patch
+tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch
+tcp-dccp-fix-possible-race-__inet_lookup_established.patch
+hv_netvsc-fix-tx_table-init-in-rndis_set_subchannel.patch
+gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch
+gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch
+bnxt-apply-computed-clamp-value-for-coalece-parameter.patch
+ipv6-addrconf-only-check-invalid-header-values-when-netlink_f_strict_chk-is-set.patch
+net-phylink-fix-interface-passed-to-mac_link_up.patch
+net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch
+vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch
diff --git a/queue-5.4/sit-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-5.4/sit-do-not-confirm-neighbor-when-do-pmtu-update.patch

new file mode 100644 (file)

index 0000000..0824912
--- /dev/null
+++ b/queue-5.4/sit-do-not-confirm-neighbor-when-do-pmtu-update.patch
@@ -0,0 +1,39 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:15 +0800
+Subject: sit: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 4d42df46d6372ece4cb4279870b46c2ea7304a47 ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/sit.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/sit.c
++++ b/net/ipv6/sit.c
+@@ -944,7 +944,7 @@ static netdev_tx_t ipip6_tunnel_xmit(str
+               }
+ 
+               if (tunnel->parms.iph.daddr)
+-                      skb_dst_update_pmtu(skb, mtu);
++                      skb_dst_update_pmtu_no_confirm(skb, mtu);
+ 
+               if (skb->len > mtu && !skb_is_gso(skb)) {
+                       icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
diff --git a/queue-5.4/tcp-dccp-fix-possible-race-__inet_lookup_established.patch b/queue-5.4/tcp-dccp-fix-possible-race-__inet_lookup_established.patch

new file mode 100644 (file)

index 0000000..f179ecc
--- /dev/null
+++ b/queue-5.4/tcp-dccp-fix-possible-race-__inet_lookup_established.patch
@@ -0,0 +1,223 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 13 Dec 2019 18:20:41 -0800
+Subject: tcp/dccp: fix possible race __inet_lookup_established()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 8dbd76e79a16b45b2ccb01d2f2e08dbf64e71e40 ]
+
+Michal Kubecek and Firo Yang did a very nice analysis of crashes
+happening in __inet_lookup_established().
+
+Since a TCP socket can go from TCP_ESTABLISH to TCP_LISTEN
+(via a close()/socket()/listen() cycle) without a RCU grace period,
+I should not have changed listeners linkage in their hash table.
+
+They must use the nulls protocol (Documentation/RCU/rculist_nulls.txt),
+so that a lookup can detect a socket in a hash list was moved in
+another one.
+
+Since we added code in commit d296ba60d8e2 ("soreuseport: Resolve
+merge conflict for v4/v6 ordering fix"), we have to add
+hlist_nulls_add_tail_rcu() helper.
+
+Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Michal Kubecek <mkubecek@suse.cz>
+Reported-by: Firo Yang <firo.yang@suse.com>
+Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
+Link: https://lore.kernel.org/netdev/20191120083919.GH27852@unicorn.suse.cz/
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/rculist_nulls.h |   37 +++++++++++++++++++++++++++++++++++++
+ include/net/inet_hashtables.h |   12 +++++++++---
+ include/net/sock.h            |    5 +++++
+ net/ipv4/inet_diag.c          |    3 ++-
+ net/ipv4/inet_hashtables.c    |   16 ++++++++--------
+ net/ipv4/tcp_ipv4.c           |    7 ++++---
+ 6 files changed, 65 insertions(+), 15 deletions(-)
+
+--- a/include/linux/rculist_nulls.h
++++ b/include/linux/rculist_nulls.h
+@@ -101,6 +101,43 @@ static inline void hlist_nulls_add_head_
+ }
+ 
+ /**
++ * hlist_nulls_add_tail_rcu
++ * @n: the element to add to the hash list.
++ * @h: the list to add to.
++ *
++ * Description:
++ * Adds the specified element to the specified hlist_nulls,
++ * while permitting racing traversals.
++ *
++ * The caller must take whatever precautions are necessary
++ * (such as holding appropriate locks) to avoid racing
++ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
++ * or hlist_nulls_del_rcu(), running on this same list.
++ * However, it is perfectly legal to run concurrently with
++ * the _rcu list-traversal primitives, such as
++ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
++ * problems on Alpha CPUs.  Regardless of the type of CPU, the
++ * list-traversal primitive must be guarded by rcu_read_lock().
++ */
++static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
++                                          struct hlist_nulls_head *h)
++{
++      struct hlist_nulls_node *i, *last = NULL;
++
++      /* Note: write side code, so rcu accessors are not needed. */
++      for (i = h->first; !is_a_nulls(i); i = i->next)
++              last = i;
++
++      if (last) {
++              n->next = last->next;
++              n->pprev = &last->next;
++              rcu_assign_pointer(hlist_next_rcu(last), n);
++      } else {
++              hlist_nulls_add_head_rcu(n, h);
++      }
++}
++
++/**
+  * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
+  * @tpos:     the type * to use as a loop cursor.
+  * @pos:      the &struct hlist_nulls_node to use as a loop cursor.
+--- a/include/net/inet_hashtables.h
++++ b/include/net/inet_hashtables.h
+@@ -103,13 +103,19 @@ struct inet_bind_hashbucket {
+       struct hlist_head       chain;
+ };
+ 
+-/*
+- * Sockets can be hashed in established or listening table
++/* Sockets can be hashed in established or listening table.
++ * We must use different 'nulls' end-of-chain value for all hash buckets :
++ * A socket might transition from ESTABLISH to LISTEN state without
++ * RCU grace period. A lookup in ehash table needs to handle this case.
+  */
++#define LISTENING_NULLS_BASE (1U << 29)
+ struct inet_listen_hashbucket {
+       spinlock_t              lock;
+       unsigned int            count;
+-      struct hlist_head       head;
++      union {
++              struct hlist_head       head;
++              struct hlist_nulls_head nulls_head;
++      };
+ };
+ 
+ /* This is for listening sockets, thus all sockets which possess wildcards. */
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -723,6 +723,11 @@ static inline void __sk_nulls_add_node_r
+       hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
+ }
+ 
++static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list)
++{
++      hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
++}
++
+ static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
+ {
+       sock_hold(sk);
+--- a/net/ipv4/inet_diag.c
++++ b/net/ipv4/inet_diag.c
+@@ -914,11 +914,12 @@ void inet_diag_dump_icsk(struct inet_has
+ 
+               for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
+                       struct inet_listen_hashbucket *ilb;
++                      struct hlist_nulls_node *node;
+ 
+                       num = 0;
+                       ilb = &hashinfo->listening_hash[i];
+                       spin_lock(&ilb->lock);
+-                      sk_for_each(sk, &ilb->head) {
++                      sk_nulls_for_each(sk, node, &ilb->nulls_head) {
+                               struct inet_sock *inet = inet_sk(sk);
+ 
+                               if (!net_eq(sock_net(sk), net))
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -516,10 +516,11 @@ static int inet_reuseport_add_sock(struc
+                                  struct inet_listen_hashbucket *ilb)
+ {
+       struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
++      const struct hlist_nulls_node *node;
+       struct sock *sk2;
+       kuid_t uid = sock_i_uid(sk);
+ 
+-      sk_for_each_rcu(sk2, &ilb->head) {
++      sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) {
+               if (sk2 != sk &&
+                   sk2->sk_family == sk->sk_family &&
+                   ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
+@@ -555,9 +556,9 @@ int __inet_hash(struct sock *sk, struct
+       }
+       if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+               sk->sk_family == AF_INET6)
+-              hlist_add_tail_rcu(&sk->sk_node, &ilb->head);
++              __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
+       else
+-              hlist_add_head_rcu(&sk->sk_node, &ilb->head);
++              __sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
+       inet_hash2(hashinfo, sk);
+       ilb->count++;
+       sock_set_flag(sk, SOCK_RCU_FREE);
+@@ -606,11 +607,9 @@ void inet_unhash(struct sock *sk)
+               reuseport_detach_sock(sk);
+       if (ilb) {
+               inet_unhash2(hashinfo, sk);
+-               __sk_del_node_init(sk);
+-               ilb->count--;
+-      } else {
+-              __sk_nulls_del_node_init_rcu(sk);
++              ilb->count--;
+       }
++      __sk_nulls_del_node_init_rcu(sk);
+       sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ unlock:
+       spin_unlock_bh(lock);
+@@ -750,7 +749,8 @@ void inet_hashinfo_init(struct inet_hash
+ 
+       for (i = 0; i < INET_LHTABLE_SIZE; i++) {
+               spin_lock_init(&h->listening_hash[i].lock);
+-              INIT_HLIST_HEAD(&h->listening_hash[i].head);
++              INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head,
++                                    i + LISTENING_NULLS_BASE);
+               h->listening_hash[i].count = 0;
+       }
+ 
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -2149,13 +2149,14 @@ static void *listening_get_next(struct s
+       struct tcp_iter_state *st = seq->private;
+       struct net *net = seq_file_net(seq);
+       struct inet_listen_hashbucket *ilb;
++      struct hlist_nulls_node *node;
+       struct sock *sk = cur;
+ 
+       if (!sk) {
+ get_head:
+               ilb = &tcp_hashinfo.listening_hash[st->bucket];
+               spin_lock(&ilb->lock);
+-              sk = sk_head(&ilb->head);
++              sk = sk_nulls_head(&ilb->nulls_head);
+               st->offset = 0;
+               goto get_sk;
+       }
+@@ -2163,9 +2164,9 @@ get_head:
+       ++st->num;
+       ++st->offset;
+ 
+-      sk = sk_next(sk);
++      sk = sk_nulls_next(sk);
+ get_sk:
+-      sk_for_each_from(sk) {
++      sk_nulls_for_each_from(sk, node) {
+               if (!net_eq(sock_net(sk), net))
+                       continue;
+               if (sk->sk_family == afinfo->family)
diff --git a/queue-5.4/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch b/queue-5.4/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch

new file mode 100644 (file)

index 0000000..7b6e499
--- /dev/null
+++ b/queue-5.4/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch
@@ -0,0 +1,53 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 12 Dec 2019 12:55:29 -0800
+Subject: tcp: do not send empty skb from tcp_write_xmit()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 1f85e6267caca44b30c54711652b0726fadbb131 ]
+
+Backport of commit fdfc5c8594c2 ("tcp: remove empty skb from
+write queue in error cases") in linux-4.14 stable triggered
+various bugs. One of them has been fixed in commit ba2ddb43f270
+("tcp: Don't dequeue SYN/FIN-segments from write-queue"), but
+we still have crashes in some occasions.
+
+Root-cause is that when tcp_sendmsg() has allocated a fresh
+skb and could not append a fragment before being blocked
+in sk_stream_wait_memory(), tcp_write_xmit() might be called
+and decide to send this fresh and empty skb.
+
+Sending an empty packet is not only silly, it might have caused
+many issues we had in the past with tp->packets_out being
+out of sync.
+
+Fixes: c65f7f00c587 ("[TCP]: Simplify SKB data portion allocation with NETIF_F_SG.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Christoph Paasch <cpaasch@apple.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Cc: Jason Baron <jbaron@akamai.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2441,6 +2441,14 @@ static bool tcp_write_xmit(struct sock *
+               if (tcp_small_queue_check(sk, skb, 0))
+                       break;
+ 
++              /* Argh, we hit an empty skb(), presumably a thread
++               * is sleeping in sendmsg()/sk_stream_wait_memory().
++               * We do not want to send a pure-ack packet and have
++               * a strange looking rtx queue with empty packet(s).
++               */
++              if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
++                      break;
++
+               if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
+                       break;
+ 
diff --git a/queue-5.4/tcp-fix-highest_sack-and-highest_sack_seq.patch b/queue-5.4/tcp-fix-highest_sack-and-highest_sack_seq.patch

new file mode 100644 (file)

index 0000000..3205083
--- /dev/null
+++ b/queue-5.4/tcp-fix-highest_sack-and-highest_sack_seq.patch
@@ -0,0 +1,45 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Cambda Zhu <cambda@linux.alibaba.com>
+Date: Fri, 27 Dec 2019 16:52:37 +0800
+Subject: tcp: Fix highest_sack and highest_sack_seq
+
+From: Cambda Zhu <cambda@linux.alibaba.com>
+
+[ Upstream commit 853697504de043ff0bfd815bd3a64de1dce73dc7 ]
+
+>From commit 50895b9de1d3 ("tcp: highest_sack fix"), the logic about
+setting tp->highest_sack to the head of the send queue was removed.
+Of course the logic is error prone, but it is logical. Before we
+remove the pointer to the highest sack skb and use the seq instead,
+we need to set tp->highest_sack to NULL when there is no skb after
+the last sack, and then replace NULL with the real skb when new skb
+inserted into the rtx queue, because the NULL means the highest sack
+seq is tp->snd_nxt. If tp->highest_sack is NULL and new data sent,
+the next ACK with sack option will increase tp->reordering unexpectedly.
+
+This patch sets tp->highest_sack to the tail of the rtx queue if
+it's NULL and new data is sent. The patch keeps the rule that the
+highest_sack can only be maintained by sack processing, except for
+this only case.
+
+Fixes: 50895b9de1d3 ("tcp: highest_sack fix")
+Signed-off-by: Cambda Zhu <cambda@linux.alibaba.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -72,6 +72,9 @@ static void tcp_event_new_data_sent(stru
+       __skb_unlink(skb, &sk->sk_write_queue);
+       tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
+ 
++      if (tp->highest_sack == NULL)
++              tp->highest_sack = skb;
++
+       tp->packets_out += tcp_skb_pcount(skb);
+       if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
+               tcp_rearm_rto(sk);
diff --git a/queue-5.4/tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-5.4/tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch

new file mode 100644 (file)

index 0000000..e4aa589
--- /dev/null
+++ b/queue-5.4/tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch
@@ -0,0 +1,62 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:13 +0800
+Subject: tunnel: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 7a1592bcb15d71400a98632727791d1e68ea0ee8 ]
+
+When do tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+v5: No Change.
+v4: Update commit description
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Fixes: 0dec879f636f ("net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP")
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Tested-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_tunnel.c  |    2 +-
+ net/ipv6/ip6_tunnel.c |    4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -505,7 +505,7 @@ static int tnl_update_pmtu(struct net_de
+               mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+ 
+       if (skb_valid_dst(skb))
+-              skb_dst_update_pmtu(skb, mtu);
++              skb_dst_update_pmtu_no_confirm(skb, mtu);
+ 
+       if (skb->protocol == htons(ETH_P_IP)) {
+               if (!skb_is_gso(skb) &&
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -640,7 +640,7 @@ ip4ip6_err(struct sk_buff *skb, struct i
+               if (rel_info > dst_mtu(skb_dst(skb2)))
+                       goto out;
+ 
+-              skb_dst_update_pmtu(skb2, rel_info);
++              skb_dst_update_pmtu_no_confirm(skb2, rel_info);
+       }
+ 
+       icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
+@@ -1132,7 +1132,7 @@ route_lookup:
+       mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
+                      IPV6_MIN_MTU : IPV4_MIN_MTU);
+ 
+-      skb_dst_update_pmtu(skb, mtu);
++      skb_dst_update_pmtu_no_confirm(skb, mtu);
+       if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
+               *pmtu = mtu;
+               err = -EMSGSIZE;
diff --git a/queue-5.4/udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch b/queue-5.4/udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch

new file mode 100644 (file)

index 0000000..e7bb4cc
--- /dev/null
+++ b/queue-5.4/udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch
@@ -0,0 +1,38 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Antonio Messina <amessina@google.com>
+Date: Thu, 19 Dec 2019 15:08:03 +0100
+Subject: udp: fix integer overflow while computing available space in sk_rcvbuf
+
+From: Antonio Messina <amessina@google.com>
+
+[ Upstream commit feed8a4fc9d46c3126fb9fcae0e9248270c6321a ]
+
+When the size of the receive buffer for a socket is close to 2^31 when
+computing if we have enough space in the buffer to copy a packet from
+the queue to the buffer we might hit an integer overflow.
+
+When an user set net.core.rmem_default to a value close to 2^31 UDP
+packets are dropped because of this overflow. This can be visible, for
+instance, with failure to resolve hostnames.
+
+This can be fixed by casting sk_rcvbuf (which is an int) to unsigned
+int, similarly to how it is done in TCP.
+
+Signed-off-by: Antonio Messina <amessina@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1475,7 +1475,7 @@ int __udp_enqueue_schedule_skb(struct so
+        * queue contains some other skb
+        */
+       rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
+-      if (rmem > (size + sk->sk_rcvbuf))
++      if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
+               goto uncharge_drop;
+ 
+       spin_lock(&list->lock);
diff --git a/queue-5.4/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch b/queue-5.4/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch

new file mode 100644 (file)

index 0000000..62e7430
--- /dev/null
+++ b/queue-5.4/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch
@@ -0,0 +1,35 @@
+From foo@baz Wed 01 Jan 2020 10:35:32 PM CET
+From: Stefano Garzarella <sgarzare@redhat.com>
+Date: Fri, 6 Dec 2019 15:39:12 +0100
+Subject: vhost/vsock: accept only packets with the right dst_cid
+
+From: Stefano Garzarella <sgarzare@redhat.com>
+
+[ Upstream commit 8a3cc29c316c17de590e3ff8b59f3d6cbfd37b0a ]
+
+When we receive a new packet from the guest, we check if the
+src_cid is correct, but we forgot to check the dst_cid.
+
+The host should accept only packets where dst_cid is
+equal to the host CID.
+
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/vsock.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/vhost/vsock.c
++++ b/drivers/vhost/vsock.c
+@@ -437,7 +437,9 @@ static void vhost_vsock_handle_tx_kick(s
+               virtio_transport_deliver_tap_pkt(pkt);
+ 
+               /* Only accept correctly addressed packets */
+-              if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
++              if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid &&
++                  le64_to_cpu(pkt->hdr.dst_cid) ==
++                  vhost_transport_get_local_cid())
+                       virtio_transport_recv_pkt(pkt);
+               else
+                       virtio_transport_free_pkt(pkt);
diff --git a/queue-5.4/vti-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-5.4/vti-do-not-confirm-neighbor-when-do-pmtu-update.patch

new file mode 100644 (file)

index 0000000..5822db8
--- /dev/null
+++ b/queue-5.4/vti-do-not-confirm-neighbor-when-do-pmtu-update.patch
@@ -0,0 +1,55 @@
+From foo@baz Wed 01 Jan 2020 10:35:31 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:14 +0800
+Subject: vti: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 8247a79efa2f28b44329f363272550c1738377de ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+Although vti and vti6 are immune to this problem because they are IFF_NOARP
+interfaces, as Guillaume pointed. There is still no sense to confirm neighbour
+here.
+
+v5: Update commit description.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_vti.c  |    2 +-
+ net/ipv6/ip6_vti.c |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_vti.c
++++ b/net/ipv4/ip_vti.c
+@@ -214,7 +214,7 @@ static netdev_tx_t vti_xmit(struct sk_bu
+ 
+       mtu = dst_mtu(dst);
+       if (skb->len > mtu) {
+-              skb_dst_update_pmtu(skb, mtu);
++              skb_dst_update_pmtu_no_confirm(skb, mtu);
+               if (skb->protocol == htons(ETH_P_IP)) {
+                       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+                                 htonl(mtu));
+--- a/net/ipv6/ip6_vti.c
++++ b/net/ipv6/ip6_vti.c
+@@ -479,7 +479,7 @@ vti6_xmit(struct sk_buff *skb, struct ne
+ 
+       mtu = dst_mtu(dst);
+       if (skb->len > mtu) {
+-              skb_dst_update_pmtu(skb, mtu);
++              skb_dst_update_pmtu_no_confirm(skb, mtu);
+ 
+               if (skb->protocol == htons(ETH_P_IPV6)) {
+                       if (mtu < IPV6_MIN_MTU)
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 1 Jan 2020 21:37:07 +0000 (22:37 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 1 Jan 2020 21:37:07 +0000 (22:37 +0100)
queue-5.4/bnxt-apply-computed-clamp-value-for-coalece-parameter.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/bnxt_en-add-missing-devlink-health-reporters-for-vfs.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/bnxt_en-fix-bp-fw_health-allocation-and-free-logic.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/bnxt_en-fix-msix-request-logic-for-rdma-driver.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/bnxt_en-fix-the-logic-that-creates-the-health-reporters.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/bnxt_en-free-context-memory-in-the-open-path-if-firmware-has-been-reset.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/bnxt_en-remove-unnecessary-null-checks-for-fw_health.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/bnxt_en-return-error-if-fw-returns-more-data-than-dump-length.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/bonding-fix-active-backup-transition-after-link-failure.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/cxgb4-cxgb4vf-fix-flow-control-display-for-auto-negotiation.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/gtp-avoid-zero-size-hashtable.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/hv_netvsc-fix-tx_table-init-in-rndis_set_subchannel.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/ipv6-addrconf-only-check-invalid-header-values-when-netlink_f_strict_chk-is-set.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/mlxsw-spectrum-use-dedicated-policer-for-vrrp-packets.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/mlxsw-spectrum_router-skip-loopback-rifs-during-mac-validation.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-dsa-bcm_sf2-fix-ip-fragment-location-and-behavior.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-dsa-sja1105-reconcile-the-meaning-of-tpid-and-tpid2-for-e-t-and-p-q-r-s.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-dst-do-not-confirm-neighbor-for-vxlan-and-geneve-pmtu-update.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-marvell-mvpp2-phylink-requires-the-link-interrupt.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-phy-aquantia-add-suspend-resume-ops-for-aqr105.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-phylink-fix-interface-passed-to-mac_link_up.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-sched-act_mirred-pull-mac-prior-redir-to-non-mac_header_xmit-device.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-sched-add-delete_empty-to-filters-and-use-it-in-cls_flower.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-stmmac-dwmac-meson8b-fix-the-rgmii-tx-delay-on-meson8b-8m2-socs.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net_sched-sch_fq-properly-set-sk-sk_pacing_status.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/series		patch \| blob \| blame \| history
queue-5.4/sit-do-not-confirm-neighbor-when-do-pmtu-update.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-dccp-fix-possible-race-__inet_lookup_established.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-fix-highest_sack-and-highest_sack_seq.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/vti-do-not-confirm-neighbor-when-do-pmtu-update.patch	[new file with mode: 0644]	patch \| blob