4.18-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 16 Oct 2018 05:08:16 +0000 (07:08 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 16 Oct 2018 05:08:16 +0000 (07:08 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 16 Oct 2018 05:08:16 +0000 (07:08 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 16 Oct 2018 05:08:16 +0000 (07:08 +0200)
diff --git a/queue-4.18/bnxt_en-don-t-try-to-offload-vlan-modify-action.patch b/queue-4.18/bnxt_en-don-t-try-to-offload-vlan-modify-action.patch

new file mode 100644 (file)

index 0000000..74a3807
--- /dev/null
+++ b/queue-4.18/bnxt_en-don-t-try-to-offload-vlan-modify-action.patch
@@ -0,0 +1,63 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Davide Caratti <dcaratti@redhat.com>
+Date: Wed, 19 Sep 2018 19:01:37 +0200
+Subject: bnxt_en: don't try to offload VLAN 'modify' action
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+[ Upstream commit 8c6ec3613e7b0aade20a3196169c0bab32ed3e3f ]
+
+bnxt offload code currently supports only 'push' and 'pop' operation: let
+.ndo_setup_tc() return -EOPNOTSUPP if VLAN 'modify' action is configured.
+
+Fixes: 2ae7408fedfe ("bnxt_en: bnxt: add TC flower filter offload support")
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Acked-by: Sathya Perla <sathya.perla@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c |   20 ++++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+@@ -75,17 +75,23 @@ static int bnxt_tc_parse_redir(struct bn
+       return 0;
+ }
+ 
+-static void bnxt_tc_parse_vlan(struct bnxt *bp,
+-                             struct bnxt_tc_actions *actions,
+-                             const struct tc_action *tc_act)
++static int bnxt_tc_parse_vlan(struct bnxt *bp,
++                            struct bnxt_tc_actions *actions,
++                            const struct tc_action *tc_act)
+ {
+-      if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_POP) {
++      switch (tcf_vlan_action(tc_act)) {
++      case TCA_VLAN_ACT_POP:
+               actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN;
+-      } else if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_PUSH) {
++              break;
++      case TCA_VLAN_ACT_PUSH:
+               actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN;
+               actions->push_vlan_tci = htons(tcf_vlan_push_vid(tc_act));
+               actions->push_vlan_tpid = tcf_vlan_push_proto(tc_act);
++              break;
++      default:
++              return -EOPNOTSUPP;
+       }
++      return 0;
+ }
+ 
+ static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
+@@ -136,7 +142,9 @@ static int bnxt_tc_parse_actions(struct
+ 
+               /* Push/pop VLAN */
+               if (is_tcf_vlan(tc_act)) {
+-                      bnxt_tc_parse_vlan(bp, actions, tc_act);
++                      rc = bnxt_tc_parse_vlan(bp, actions, tc_act);
++                      if (rc)
++                              return rc;
+                       continue;
+               }
+ 
diff --git a/queue-4.18/bnxt_en-fix-enables-field-in-hwrm_queue_cos2bw_cfg-request.patch b/queue-4.18/bnxt_en-fix-enables-field-in-hwrm_queue_cos2bw_cfg-request.patch

new file mode 100644 (file)

index 0000000..7fdc334
--- /dev/null
+++ b/queue-4.18/bnxt_en-fix-enables-field-in-hwrm_queue_cos2bw_cfg-request.patch
@@ -0,0 +1,43 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Fri, 5 Oct 2018 00:26:01 -0400
+Subject: bnxt_en: Fix enables field in HWRM_QUEUE_COS2BW_CFG request
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit 5db0e0969af6501ad45fe0494039d3b9c797822b ]
+
+In HWRM_QUEUE_COS2BW_CFG request, enables field should have the bits
+set only for the queue ids which are having the valid parameters.
+
+This causes firmware to return error when the TC to hardware CoS queue
+mapping is not 1:1 during DCBNL ETS setup.
+
+Fixes: 2e8ef77ee0ff ("bnxt_en: Add TC to hardware QoS queue mapping logic.")
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+@@ -98,13 +98,13 @@ static int bnxt_hwrm_queue_cos2bw_cfg(st
+ 
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1);
+       for (i = 0; i < max_tc; i++) {
+-              u8 qidx;
++              u8 qidx = bp->tc_to_qidx[i];
+ 
+               req.enables |= cpu_to_le32(
+-                      QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << i);
++                      QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID <<
++                      qidx);
+ 
+               memset(&cos2bw, 0, sizeof(cos2bw));
+-              qidx = bp->tc_to_qidx[i];
+               cos2bw.queue_id = bp->q_info[qidx].queue_id;
+               if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_STRICT) {
+                       cos2bw.tsa =
diff --git a/queue-4.18/bnxt_en-fix-tx-timeout-during-netpoll.patch b/queue-4.18/bnxt_en-fix-tx-timeout-during-netpoll.patch

new file mode 100644 (file)

index 0000000..04188ea
--- /dev/null
+++ b/queue-4.18/bnxt_en-fix-tx-timeout-during-netpoll.patch
@@ -0,0 +1,73 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Michael Chan <michael.chan@broadcom.com>
+Date: Wed, 26 Sep 2018 00:41:04 -0400
+Subject: bnxt_en: Fix TX timeout during netpoll.
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit 73f21c653f930f438d53eed29b5e4c65c8a0f906 ]
+
+The current netpoll implementation in the bnxt_en driver has problems
+that may miss TX completion events.  bnxt_poll_work() in effect is
+only handling at most 1 TX packet before exiting.  In addition,
+there may be in flight TX completions that ->poll() may miss even
+after we fix bnxt_poll_work() to handle all visible TX completions.
+netpoll may not call ->poll() again and HW may not generate IRQ
+because the driver does not ARM the IRQ when the budget (0 for netpoll)
+is reached.
+
+We fix it by handling all TX completions and to always ARM the IRQ
+when we exit ->poll() with 0 budget.
+
+Also, the logic to ACK the completion ring in case it is almost filled
+with TX completions need to be adjusted to take care of the 0 budget
+case, as discussed with Eric Dumazet <edumazet@google.com>
+
+Reported-by: Song Liu <songliubraving@fb.com>
+Reviewed-by: Song Liu <songliubraving@fb.com>
+Tested-by: Song Liu <songliubraving@fb.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -1882,8 +1882,11 @@ static int bnxt_poll_work(struct bnxt *b
+               if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) {
+                       tx_pkts++;
+                       /* return full budget so NAPI will complete. */
+-                      if (unlikely(tx_pkts > bp->tx_wake_thresh))
++                      if (unlikely(tx_pkts > bp->tx_wake_thresh)) {
+                               rx_pkts = budget;
++                              raw_cons = NEXT_RAW_CMP(raw_cons);
++                              break;
++                      }
+               } else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
+                       if (likely(budget))
+                               rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+@@ -1911,7 +1914,7 @@ static int bnxt_poll_work(struct bnxt *b
+               }
+               raw_cons = NEXT_RAW_CMP(raw_cons);
+ 
+-              if (rx_pkts == budget)
++              if (rx_pkts && rx_pkts == budget)
+                       break;
+       }
+ 
+@@ -2025,8 +2028,12 @@ static int bnxt_poll(struct napi_struct
+       while (1) {
+               work_done += bnxt_poll_work(bp, bnapi, budget - work_done);
+ 
+-              if (work_done >= budget)
++              if (work_done >= budget) {
++                      if (!budget)
++                              BNXT_CP_DB_REARM(cpr->cp_doorbell,
++                                               cpr->cp_raw_cons);
+                       break;
++              }
+ 
+               if (!bnxt_has_work(bp, cpr)) {
+                       if (napi_complete_done(napi, work_done))
diff --git a/queue-4.18/bnxt_en-fix-vnic-reservations-on-the-pf.patch b/queue-4.18/bnxt_en-fix-vnic-reservations-on-the-pf.patch

new file mode 100644 (file)

index 0000000..d5eaaa0
--- /dev/null
+++ b/queue-4.18/bnxt_en-fix-vnic-reservations-on-the-pf.patch
@@ -0,0 +1,34 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Michael Chan <michael.chan@broadcom.com>
+Date: Fri, 5 Oct 2018 00:26:00 -0400
+Subject: bnxt_en: Fix VNIC reservations on the PF.
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit dbe80d446c859873820eedfff4abc61c71f1927b ]
+
+The enables bit for VNIC was set wrong when calling the HWRM_FUNC_CFG
+firmware call to reserve VNICs.  This has the effect that the firmware
+will keep a large number of VNICs for the PF, and having very few for
+VFs.  DPDK driver running on the VFs, which requires more VNICs, may not
+work properly as a result.
+
+Fixes: 674f50a5b026 ("bnxt_en: Implement new method to reserve rings.")
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -4651,7 +4651,7 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt
+                                     FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+               enables |= ring_grps ?
+                          FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
+-              enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0;
++              enables |= vnics ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0;
+ 
+               req->num_rx_rings = cpu_to_le16(rx_rings);
+               req->num_hw_ring_grps = cpu_to_le16(ring_grps);
diff --git a/queue-4.18/bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch b/queue-4.18/bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch

new file mode 100644 (file)

index 0000000..1f1f1fa
--- /dev/null
+++ b/queue-4.18/bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch
@@ -0,0 +1,50 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
+Date: Fri, 5 Oct 2018 00:26:02 -0400
+Subject: bnxt_en: free hwrm resources, if driver probe fails.
+
+From: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
+
+[ Upstream commit a2bf74f4e1b82395dad2b08d2a911d9151db71c1 ]
+
+When the driver probe fails, all the resources that were allocated prior
+to the failure must be freed. However, hwrm dma response memory is not
+getting freed.
+
+This patch fixes the problem described above.
+
+Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
+Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -3015,10 +3015,11 @@ static void bnxt_free_hwrm_resources(str
+ {
+       struct pci_dev *pdev = bp->pdev;
+ 
+-      dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr,
+-                        bp->hwrm_cmd_resp_dma_addr);
+-
+-      bp->hwrm_cmd_resp_addr = NULL;
++      if (bp->hwrm_cmd_resp_addr) {
++              dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr,
++                                bp->hwrm_cmd_resp_dma_addr);
++              bp->hwrm_cmd_resp_addr = NULL;
++      }
+       if (bp->hwrm_dbg_resp_addr) {
+               dma_free_coherent(&pdev->dev, HWRM_DBG_REG_BUF_SIZE,
+                                 bp->hwrm_dbg_resp_addr,
+@@ -8931,6 +8932,7 @@ init_err_cleanup_tc:
+       bnxt_clear_int_mode(bp);
+ 
+ init_err_pci_clean:
++      bnxt_free_hwrm_resources(bp);
+       bnxt_cleanup_pci(bp);
+ 
+ init_err_free:
diff --git a/queue-4.18/bnxt_en-get-the-reduced-max_irqs-by-the-ones-used-by-rdma.patch b/queue-4.18/bnxt_en-get-the-reduced-max_irqs-by-the-ones-used-by-rdma.patch

new file mode 100644 (file)

index 0000000..d53cf3c
--- /dev/null
+++ b/queue-4.18/bnxt_en-get-the-reduced-max_irqs-by-the-ones-used-by-rdma.patch
@@ -0,0 +1,36 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Date: Fri, 5 Oct 2018 00:26:03 -0400
+Subject: bnxt_en: get the reduced max_irqs by the ones used by RDMA
+
+From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+
+[ Upstream commit c78fe058879bdea919d44f23e21da26f603e9166 ]
+
+When getting the max rings supported, get the reduced max_irqs
+by the ones used by RDMA.
+
+If the number MSIX is the limiting factor, this bug may cause the
+max ring count to be higher than it should be when RDMA driver is
+loaded and may result in ring allocation failures.
+
+Fixes: 30f529473ec9 ("bnxt_en: Do not modify max IRQ count after RDMA driver requests/frees IRQs.")
+Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -8501,7 +8501,7 @@ static void _bnxt_get_max_rings(struct b
+       *max_tx = hw_resc->max_tx_rings;
+       *max_rx = hw_resc->max_rx_rings;
+       *max_cp = min_t(int, bnxt_get_max_func_cp_rings_for_en(bp),
+-                      hw_resc->max_irqs);
++                      hw_resc->max_irqs - bnxt_get_ulp_msix_num(bp));
+       *max_cp = min_t(int, *max_cp, hw_resc->max_stat_ctxs);
+       max_ring_grps = hw_resc->max_hw_ring_grps;
+       if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) {
diff --git a/queue-4.18/bonding-avoid-possible-dead-lock.patch b/queue-4.18/bonding-avoid-possible-dead-lock.patch

new file mode 100644 (file)

index 0000000..dfd4c17
--- /dev/null
+++ b/queue-4.18/bonding-avoid-possible-dead-lock.patch
@@ -0,0 +1,244 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Mahesh Bandewar <maheshb@google.com>
+Date: Mon, 24 Sep 2018 14:40:11 -0700
+Subject: bonding: avoid possible dead-lock
+
+From: Mahesh Bandewar <maheshb@google.com>
+
+[ Upstream commit d4859d749aa7090ffb743d15648adb962a1baeae ]
+
+Syzkaller reported this on a slightly older kernel but it's still
+applicable to the current kernel -
+
+======================================================
+WARNING: possible circular locking dependency detected
+4.18.0-next-20180823+ #46 Not tainted
+------------------------------------------------------
+syz-executor4/26841 is trying to acquire lock:
+00000000dd41ef48 ((wq_completion)bond_dev->name){+.+.}, at: flush_workqueue+0x2db/0x1e10 kernel/workqueue.c:2652
+
+but task is already holding lock:
+00000000768ab431 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline]
+00000000768ab431 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4708
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+-> #2 (rtnl_mutex){+.+.}:
+       __mutex_lock_common kernel/locking/mutex.c:925 [inline]
+       __mutex_lock+0x171/0x1700 kernel/locking/mutex.c:1073
+       mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:1088
+       rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77
+       bond_netdev_notify drivers/net/bonding/bond_main.c:1310 [inline]
+       bond_netdev_notify_work+0x44/0xd0 drivers/net/bonding/bond_main.c:1320
+       process_one_work+0xc73/0x1aa0 kernel/workqueue.c:2153
+       worker_thread+0x189/0x13c0 kernel/workqueue.c:2296
+       kthread+0x35a/0x420 kernel/kthread.c:246
+       ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415
+
+-> #1 ((work_completion)(&(&nnw->work)->work)){+.+.}:
+       process_one_work+0xc0b/0x1aa0 kernel/workqueue.c:2129
+       worker_thread+0x189/0x13c0 kernel/workqueue.c:2296
+       kthread+0x35a/0x420 kernel/kthread.c:246
+       ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415
+
+-> #0 ((wq_completion)bond_dev->name){+.+.}:
+       lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901
+       flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655
+       drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820
+       destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155
+       __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138
+       bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734
+       register_netdevice+0x337/0x1100 net/core/dev.c:8410
+       bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453
+       rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099
+       rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711
+       netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454
+       rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729
+       netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
+       netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343
+       netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908
+       sock_sendmsg_nosec net/socket.c:622 [inline]
+       sock_sendmsg+0xd5/0x120 net/socket.c:632
+       ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115
+       __sys_sendmsg+0x11d/0x290 net/socket.c:2153
+       __do_sys_sendmsg net/socket.c:2162 [inline]
+       __se_sys_sendmsg net/socket.c:2160 [inline]
+       __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160
+       do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
+       entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+other info that might help us debug this:
+
+Chain exists of:
+  (wq_completion)bond_dev->name --> (work_completion)(&(&nnw->work)->work) --> rtnl_mutex
+
+ Possible unsafe locking scenario:
+
+       CPU0                    CPU1
+       ----                    ----
+  lock(rtnl_mutex);
+                               lock((work_completion)(&(&nnw->work)->work));
+                               lock(rtnl_mutex);
+  lock((wq_completion)bond_dev->name);
+
+ *** DEADLOCK ***
+
+1 lock held by syz-executor4/26841:
+
+stack backtrace:
+CPU: 1 PID: 26841 Comm: syz-executor4 Not tainted 4.18.0-next-20180823+ #46
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
+ print_circular_bug.isra.34.cold.55+0x1bd/0x27d kernel/locking/lockdep.c:1222
+ check_prev_add kernel/locking/lockdep.c:1862 [inline]
+ check_prevs_add kernel/locking/lockdep.c:1975 [inline]
+ validate_chain kernel/locking/lockdep.c:2416 [inline]
+ __lock_acquire+0x3449/0x5020 kernel/locking/lockdep.c:3412
+ lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901
+ flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655
+ drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820
+ destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155
+ __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138
+ bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734
+ register_netdevice+0x337/0x1100 net/core/dev.c:8410
+ bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453
+ rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099
+ rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711
+ netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454
+ rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729
+ netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
+ netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343
+ netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908
+ sock_sendmsg_nosec net/socket.c:622 [inline]
+ sock_sendmsg+0xd5/0x120 net/socket.c:632
+ ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115
+ __sys_sendmsg+0x11d/0x290 net/socket.c:2153
+ __do_sys_sendmsg net/socket.c:2162 [inline]
+ __se_sys_sendmsg net/socket.c:2160 [inline]
+ __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160
+ do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x457089
+Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007f2df20a5c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 00007f2df20a66d4 RCX: 0000000000457089
+RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003
+RBP: 0000000000930140 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
+R13: 00000000004d40b8 R14: 00000000004c8ad8 R15: 0000000000000001
+
+Signed-off-by: Mahesh Bandewar <maheshb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |   43 +++++++++++++++-------------------------
+ include/net/bonding.h           |    7 ------
+ 2 files changed, 18 insertions(+), 32 deletions(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -210,6 +210,7 @@ static void bond_get_stats(struct net_de
+ static void bond_slave_arr_handler(struct work_struct *work);
+ static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
+                                 int mod);
++static void bond_netdev_notify_work(struct work_struct *work);
+ 
+ /*---------------------------- General routines -----------------------------*/
+ 
+@@ -1276,6 +1277,8 @@ static struct slave *bond_alloc_slave(st
+                       return NULL;
+               }
+       }
++      INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work);
++
+       return slave;
+ }
+ 
+@@ -1283,6 +1286,7 @@ static void bond_free_slave(struct slave
+ {
+       struct bonding *bond = bond_get_bond_by_slave(slave);
+ 
++      cancel_delayed_work_sync(&slave->notify_work);
+       if (BOND_MODE(bond) == BOND_MODE_8023AD)
+               kfree(SLAVE_AD_INFO(slave));
+ 
+@@ -1304,39 +1308,26 @@ static void bond_fill_ifslave(struct sla
+       info->link_failure_count = slave->link_failure_count;
+ }
+ 
+-static void bond_netdev_notify(struct net_device *dev,
+-                             struct netdev_bonding_info *info)
+-{
+-      rtnl_lock();
+-      netdev_bonding_info_change(dev, info);
+-      rtnl_unlock();
+-}
+-
+ static void bond_netdev_notify_work(struct work_struct *_work)
+ {
+-      struct netdev_notify_work *w =
+-              container_of(_work, struct netdev_notify_work, work.work);
++      struct slave *slave = container_of(_work, struct slave,
++                                         notify_work.work);
++
++      if (rtnl_trylock()) {
++              struct netdev_bonding_info binfo;
+ 
+-      bond_netdev_notify(w->dev, &w->bonding_info);
+-      dev_put(w->dev);
+-      kfree(w);
++              bond_fill_ifslave(slave, &binfo.slave);
++              bond_fill_ifbond(slave->bond, &binfo.master);
++              netdev_bonding_info_change(slave->dev, &binfo);
++              rtnl_unlock();
++      } else {
++              queue_delayed_work(slave->bond->wq, &slave->notify_work, 1);
++      }
+ }
+ 
+ void bond_queue_slave_event(struct slave *slave)
+ {
+-      struct bonding *bond = slave->bond;
+-      struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC);
+-
+-      if (!nnw)
+-              return;
+-
+-      dev_hold(slave->dev);
+-      nnw->dev = slave->dev;
+-      bond_fill_ifslave(slave, &nnw->bonding_info.slave);
+-      bond_fill_ifbond(bond, &nnw->bonding_info.master);
+-      INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work);
+-
+-      queue_delayed_work(slave->bond->wq, &nnw->work, 0);
++      queue_delayed_work(slave->bond->wq, &slave->notify_work, 0);
+ }
+ 
+ void bond_lower_state_changed(struct slave *slave)
+--- a/include/net/bonding.h
++++ b/include/net/bonding.h
+@@ -139,12 +139,6 @@ struct bond_parm_tbl {
+       int mode;
+ };
+ 
+-struct netdev_notify_work {
+-      struct delayed_work     work;
+-      struct net_device       *dev;
+-      struct netdev_bonding_info bonding_info;
+-};
+-
+ struct slave {
+       struct net_device *dev; /* first - useful for panic debug */
+       struct bonding *bond; /* our master */
+@@ -172,6 +166,7 @@ struct slave {
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+       struct netpoll *np;
+ #endif
++      struct delayed_work notify_work;
+       struct kobject kobj;
+       struct rtnl_link_stats64 slave_stats;
+ };
diff --git a/queue-4.18/bonding-fix-warning-message.patch b/queue-4.18/bonding-fix-warning-message.patch

new file mode 100644 (file)

index 0000000..eb5ad48
--- /dev/null
+++ b/queue-4.18/bonding-fix-warning-message.patch
@@ -0,0 +1,40 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Mahesh Bandewar <maheshb@google.com>
+Date: Tue, 2 Oct 2018 12:14:34 -0700
+Subject: bonding: fix warning message
+
+From: Mahesh Bandewar <maheshb@google.com>
+
+[ Upstream commit 0f3b914c9cfcd7bbedd445dc4ac5dd999fa213c2 ]
+
+RX queue config for bonding master could be different from its slave
+device(s). With the commit 6a9e461f6fe4 ("bonding: pass link-local
+packets to bonding master also."), the packet is reinjected into stack
+with skb->dev as bonding master. This potentially triggers the
+message:
+
+   "bondX received packet on queue Y, but number of RX queues is Z"
+
+whenever the queue that packet is received on is higher than the
+numrxqueues on bonding master (Y > Z).
+
+Fixes: 6a9e461f6fe4 ("bonding: pass link-local packets to bonding master also.")
+Reported-by: John Sperbeck <jsperbeck@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Mahesh Bandewar <maheshb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -1194,6 +1194,7 @@ static rx_handler_result_t bond_handle_f
+ 
+               if (nskb) {
+                       nskb->dev = bond->dev;
++                      nskb->queue_mapping = 0;
+                       netif_rx(nskb);
+               }
+               return RX_HANDLER_PASS;
diff --git a/queue-4.18/bonding-pass-link-local-packets-to-bonding-master-also.patch b/queue-4.18/bonding-pass-link-local-packets-to-bonding-master-also.patch

new file mode 100644 (file)

index 0000000..4c09e69
--- /dev/null
+++ b/queue-4.18/bonding-pass-link-local-packets-to-bonding-master-also.patch
@@ -0,0 +1,59 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Mahesh Bandewar <maheshb@google.com>
+Date: Mon, 24 Sep 2018 14:39:42 -0700
+Subject: bonding: pass link-local packets to bonding master also.
+
+From: Mahesh Bandewar <maheshb@google.com>
+
+[ Upstream commit 6a9e461f6fe4434e6172304b69774daff9a3ac4c ]
+
+Commit b89f04c61efe ("bonding: deliver link-local packets with
+skb->dev set to link that packets arrived on") changed the behavior
+of how link-local-multicast packets are processed. The change in
+the behavior broke some legacy use cases where these packets are
+expected to arrive on bonding master device also.
+
+This patch passes the packet to the stack with the link it arrived
+on as well as passes to the bonding-master device to preserve the
+legacy use case.
+
+Fixes: b89f04c61efe ("bonding: deliver link-local packets with skb->dev set to link that packets arrived on")
+Reported-by: Michal Soltys <soltys@ziu.info>
+Signed-off-by: Mahesh Bandewar <maheshb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |   21 +++++++++++++++++++--
+ 1 file changed, 19 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -1178,9 +1178,26 @@ static rx_handler_result_t bond_handle_f
+               }
+       }
+ 
+-      /* don't change skb->dev for link-local packets */
+-      if (is_link_local_ether_addr(eth_hdr(skb)->h_dest))
++      /* Link-local multicast packets should be passed to the
++       * stack on the link they arrive as well as pass them to the
++       * bond-master device. These packets are mostly usable when
++       * stack receives it with the link on which they arrive
++       * (e.g. LLDP) they also must be available on master. Some of
++       * the use cases include (but are not limited to): LLDP agents
++       * that must be able to operate both on enslaved interfaces as
++       * well as on bonds themselves; linux bridges that must be able
++       * to process/pass BPDUs from attached bonds when any kind of
++       * STP version is enabled on the network.
++       */
++      if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) {
++              struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
++
++              if (nskb) {
++                      nskb->dev = bond->dev;
++                      netif_rx(nskb);
++              }
+               return RX_HANDLER_PASS;
++      }
+       if (bond_should_deliver_exact_match(skb, slave, bond))
+               return RX_HANDLER_EXACT;
+ 
diff --git a/queue-4.18/inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch b/queue-4.18/inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch

new file mode 100644 (file)

index 0000000..c766138
--- /dev/null
+++ b/queue-4.18/inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch
@@ -0,0 +1,101 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 2 Oct 2018 12:35:05 -0700
+Subject: inet: make sure to grab rcu_read_lock before using ireq->ireq_opt
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 2ab2ddd301a22ca3c5f0b743593e4ad2953dfa53 ]
+
+Timer handlers do not imply rcu_read_lock(), so my recent fix
+triggered a LOCKDEP warning when SYNACK is retransmit.
+
+Lets add rcu_read_lock()/rcu_read_unlock() pairs around ireq->ireq_opt
+usages instead of guessing what is done by callers, since it is
+not worth the pain.
+
+Get rid of ireq_opt_deref() helper since it hides the logic
+without real benefit, since it is now a standard rcu_dereference().
+
+Fixes: 1ad98e9d1bdf ("tcp/dccp: fix lockdep issue when SYN is backlogged")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_sock.h         |    5 -----
+ net/dccp/ipv4.c                 |    4 +++-
+ net/ipv4/inet_connection_sock.c |    5 ++++-
+ net/ipv4/tcp_ipv4.c             |    4 +++-
+ 4 files changed, 10 insertions(+), 8 deletions(-)
+
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -130,11 +130,6 @@ static inline int inet_request_bound_dev
+       return sk->sk_bound_dev_if;
+ }
+ 
+-static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq)
+-{
+-      return rcu_dereference(ireq->ireq_opt);
+-}
+-
+ struct inet_cork {
+       unsigned int            flags;
+       __be32                  addr;
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -493,9 +493,11 @@ static int dccp_v4_send_response(const s
+ 
+               dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr,
+                                                             ireq->ir_rmt_addr);
++              rcu_read_lock();
+               err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+                                           ireq->ir_rmt_addr,
+-                                          ireq_opt_deref(ireq));
++                                          rcu_dereference(ireq->ireq_opt));
++              rcu_read_unlock();
+               err = net_xmit_eval(err);
+       }
+ 
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -535,7 +535,8 @@ struct dst_entry *inet_csk_route_req(con
+       struct ip_options_rcu *opt;
+       struct rtable *rt;
+ 
+-      opt = ireq_opt_deref(ireq);
++      rcu_read_lock();
++      opt = rcu_dereference(ireq->ireq_opt);
+ 
+       flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+                          RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+@@ -549,11 +550,13 @@ struct dst_entry *inet_csk_route_req(con
+               goto no_route;
+       if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
+               goto route_err;
++      rcu_read_unlock();
+       return &rt->dst;
+ 
+ route_err:
+       ip_rt_put(rt);
+ no_route:
++      rcu_read_unlock();
+       __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
+       return NULL;
+ }
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -942,9 +942,11 @@ static int tcp_v4_send_synack(const stru
+       if (skb) {
+               __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
+ 
++              rcu_read_lock();
+               err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+                                           ireq->ir_rmt_addr,
+-                                          ireq_opt_deref(ireq));
++                                          rcu_dereference(ireq->ireq_opt));
++              rcu_read_unlock();
+               err = net_xmit_eval(err);
+       }
+ 
diff --git a/queue-4.18/ip6_tunnel-be-careful-when-accessing-the-inner-header.patch b/queue-4.18/ip6_tunnel-be-careful-when-accessing-the-inner-header.patch

new file mode 100644 (file)

index 0000000..68f9499
--- /dev/null
+++ b/queue-4.18/ip6_tunnel-be-careful-when-accessing-the-inner-header.patch
@@ -0,0 +1,136 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Wed, 19 Sep 2018 15:02:07 +0200
+Subject: ip6_tunnel: be careful when accessing the inner header
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 76c0ddd8c3a683f6e2c6e60e11dc1a1558caf4bc ]
+
+the ip6 tunnel xmit ndo assumes that the processed skb always
+contains an ip[v6] header, but syzbot has found a way to send
+frames that fall short of this assumption, leading to the following splat:
+
+BUG: KMSAN: uninit-value in ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307
+[inline]
+BUG: KMSAN: uninit-value in ip6_tnl_start_xmit+0x7d2/0x1ef0
+net/ipv6/ip6_tunnel.c:1390
+CPU: 0 PID: 4504 Comm: syz-executor558 Not tainted 4.16.0+ #87
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
+Google 01/01/2011
+Call Trace:
+  __dump_stack lib/dump_stack.c:17 [inline]
+  dump_stack+0x185/0x1d0 lib/dump_stack.c:53
+  kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067
+  __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683
+  ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 [inline]
+  ip6_tnl_start_xmit+0x7d2/0x1ef0 net/ipv6/ip6_tunnel.c:1390
+  __netdev_start_xmit include/linux/netdevice.h:4066 [inline]
+  netdev_start_xmit include/linux/netdevice.h:4075 [inline]
+  xmit_one net/core/dev.c:3026 [inline]
+  dev_hard_start_xmit+0x5f1/0xc70 net/core/dev.c:3042
+  __dev_queue_xmit+0x27ee/0x3520 net/core/dev.c:3557
+  dev_queue_xmit+0x4b/0x60 net/core/dev.c:3590
+  packet_snd net/packet/af_packet.c:2944 [inline]
+  packet_sendmsg+0x7c70/0x8a30 net/packet/af_packet.c:2969
+  sock_sendmsg_nosec net/socket.c:630 [inline]
+  sock_sendmsg net/socket.c:640 [inline]
+  ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046
+  __sys_sendmmsg+0x42d/0x800 net/socket.c:2136
+  SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167
+  SyS_sendmmsg+0x63/0x90 net/socket.c:2162
+  do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
+  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
+RIP: 0033:0x441819
+RSP: 002b:00007ffe58ee8268 EFLAGS: 00000213 ORIG_RAX: 0000000000000133
+RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000441819
+RDX: 0000000000000002 RSI: 0000000020000100 RDI: 0000000000000003
+RBP: 00000000006cd018 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000213 R12: 0000000000402510
+R13: 00000000004025a0 R14: 0000000000000000 R15: 0000000000000000
+
+Uninit was created at:
+  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline]
+  kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188
+  kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314
+  kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321
+  slab_post_alloc_hook mm/slab.h:445 [inline]
+  slab_alloc_node mm/slub.c:2737 [inline]
+  __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369
+  __kmalloc_reserve net/core/skbuff.c:138 [inline]
+  __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206
+  alloc_skb include/linux/skbuff.h:984 [inline]
+  alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234
+  sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085
+  packet_alloc_skb net/packet/af_packet.c:2803 [inline]
+  packet_snd net/packet/af_packet.c:2894 [inline]
+  packet_sendmsg+0x6454/0x8a30 net/packet/af_packet.c:2969
+  sock_sendmsg_nosec net/socket.c:630 [inline]
+  sock_sendmsg net/socket.c:640 [inline]
+  ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046
+  __sys_sendmmsg+0x42d/0x800 net/socket.c:2136
+  SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167
+  SyS_sendmmsg+0x63/0x90 net/socket.c:2162
+  do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
+  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
+
+This change addresses the issue adding the needed check before
+accessing the inner header.
+
+The ipv4 side of the issue is apparently there since the ipv4 over ipv6
+initial support, and the ipv6 side predates git history.
+
+Fixes: c4d3efafcc93 ("[IPV6] IP6TUNNEL: Add support to IPv4 over IPv6 tunnel.")
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: syzbot+3fde91d4d394747d6db4@syzkaller.appspotmail.com
+Tested-by: Alexander Potapenko <glider@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_tunnel.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -1226,7 +1226,7 @@ static inline int
+ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+       struct ip6_tnl *t = netdev_priv(dev);
+-      const struct iphdr  *iph = ip_hdr(skb);
++      const struct iphdr  *iph;
+       int encap_limit = -1;
+       struct flowi6 fl6;
+       __u8 dsfield;
+@@ -1234,6 +1234,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str
+       u8 tproto;
+       int err;
+ 
++      /* ensure we can access the full inner ip header */
++      if (!pskb_may_pull(skb, sizeof(struct iphdr)))
++              return -1;
++
++      iph = ip_hdr(skb);
+       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ 
+       tproto = READ_ONCE(t->parms.proto);
+@@ -1297,7 +1302,7 @@ static inline int
+ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+       struct ip6_tnl *t = netdev_priv(dev);
+-      struct ipv6hdr *ipv6h = ipv6_hdr(skb);
++      struct ipv6hdr *ipv6h;
+       int encap_limit = -1;
+       __u16 offset;
+       struct flowi6 fl6;
+@@ -1306,6 +1311,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str
+       u8 tproto;
+       int err;
+ 
++      if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
++              return -1;
++
++      ipv6h = ipv6_hdr(skb);
+       tproto = READ_ONCE(t->parms.proto);
+       if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
+           ip6_tnl_addr_conflict(t, ipv6h))
diff --git a/queue-4.18/ip_tunnel-be-careful-when-accessing-the-inner-header.patch b/queue-4.18/ip_tunnel-be-careful-when-accessing-the-inner-header.patch

new file mode 100644 (file)

index 0000000..418fd67
--- /dev/null
+++ b/queue-4.18/ip_tunnel-be-careful-when-accessing-the-inner-header.patch
@@ -0,0 +1,47 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Mon, 24 Sep 2018 15:48:19 +0200
+Subject: ip_tunnel: be careful when accessing the inner header
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit ccfec9e5cb2d48df5a955b7bf47f7782157d3bc2]
+
+Cong noted that we need the same checks introduced by commit 76c0ddd8c3a6
+("ip6_tunnel: be careful when accessing the inner header")
+even for ipv4 tunnels.
+
+Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.")
+Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_tunnel.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -627,6 +627,7 @@ void ip_tunnel_xmit(struct sk_buff *skb,
+                   const struct iphdr *tnl_params, u8 protocol)
+ {
+       struct ip_tunnel *tunnel = netdev_priv(dev);
++      unsigned int inner_nhdr_len = 0;
+       const struct iphdr *inner_iph;
+       struct flowi4 fl4;
+       u8     tos, ttl;
+@@ -636,6 +637,14 @@ void ip_tunnel_xmit(struct sk_buff *skb,
+       __be32 dst;
+       bool connected;
+ 
++      /* ensure we can access the inner net header, for several users below */
++      if (skb->protocol == htons(ETH_P_IP))
++              inner_nhdr_len = sizeof(struct iphdr);
++      else if (skb->protocol == htons(ETH_P_IPV6))
++              inner_nhdr_len = sizeof(struct ipv6hdr);
++      if (unlikely(!pskb_may_pull(skb, inner_nhdr_len)))
++              goto tx_error;
++
+       inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+       connected = (tunnel->parms.iph.daddr != 0);
+ 
diff --git a/queue-4.18/ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch b/queue-4.18/ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch

new file mode 100644 (file)

index 0000000..958f660
--- /dev/null
+++ b/queue-4.18/ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch
@@ -0,0 +1,42 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 30 Sep 2018 11:33:39 -0700
+Subject: ipv4: fix use-after-free in ip_cmsg_recv_dstaddr()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 64199fc0a46ba211362472f7f942f900af9492fd ]
+
+Caching ip_hdr(skb) before a call to pskb_may_pull() is buggy,
+do not do it.
+
+Fixes: 2efd4fca703a ("ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Acked-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_sockglue.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_sockglue.c
++++ b/net/ipv4/ip_sockglue.c
+@@ -149,7 +149,6 @@ static void ip_cmsg_recv_security(struct
+ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
+ {
+       struct sockaddr_in sin;
+-      const struct iphdr *iph = ip_hdr(skb);
+       __be16 *ports;
+       int end;
+ 
+@@ -164,7 +163,7 @@ static void ip_cmsg_recv_dstaddr(struct
+       ports = (__be16 *)skb_transport_header(skb);
+ 
+       sin.sin_family = AF_INET;
+-      sin.sin_addr.s_addr = iph->daddr;
++      sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
+       sin.sin_port = ports[1];
+       memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
+ 
diff --git a/queue-4.18/ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch b/queue-4.18/ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch

new file mode 100644 (file)

index 0000000..fa9c219
--- /dev/null
+++ b/queue-4.18/ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch
@@ -0,0 +1,169 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Wei Wang <weiwan@google.com>
+Date: Thu, 4 Oct 2018 10:12:37 -0700
+Subject: ipv6: take rcu lock in rawv6_send_hdrinc()
+
+From: Wei Wang <weiwan@google.com>
+
+[ Upstream commit a688caa34beb2fd2a92f1b6d33e40cde433ba160 ]
+
+In rawv6_send_hdrinc(), in order to avoid an extra dst_hold(), we
+directly assign the dst to skb and set passed in dst to NULL to avoid
+double free.
+However, in error case, we free skb and then do stats update with the
+dst pointer passed in. This causes use-after-free on the dst.
+Fix it by taking rcu read lock right before dst could get released to
+make sure dst does not get freed until the stats update is done.
+Note: we don't have this issue in ipv4 cause dst is not used for stats
+update in v4.
+
+Syzkaller reported following crash:
+BUG: KASAN: use-after-free in rawv6_send_hdrinc net/ipv6/raw.c:692 [inline]
+BUG: KASAN: use-after-free in rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921
+Read of size 8 at addr ffff8801d95ba730 by task syz-executor0/32088
+
+CPU: 1 PID: 32088 Comm: syz-executor0 Not tainted 4.19.0-rc2+ #93
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113
+ print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256
+ kasan_report_error mm/kasan/report.c:354 [inline]
+ kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412
+ __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
+ rawv6_send_hdrinc net/ipv6/raw.c:692 [inline]
+ rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921
+ inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
+ sock_sendmsg_nosec net/socket.c:621 [inline]
+ sock_sendmsg+0xd5/0x120 net/socket.c:631
+ ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114
+ __sys_sendmsg+0x11d/0x280 net/socket.c:2152
+ __do_sys_sendmsg net/socket.c:2161 [inline]
+ __se_sys_sendmsg net/socket.c:2159 [inline]
+ __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159
+ do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x457099
+Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007f83756edc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 00007f83756ee6d4 RCX: 0000000000457099
+RDX: 0000000000000000 RSI: 0000000020003840 RDI: 0000000000000004
+RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
+R13: 00000000004d4b30 R14: 00000000004c90b1 R15: 0000000000000000
+
+Allocated by task 32088:
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:448
+ set_track mm/kasan/kasan.c:460 [inline]
+ kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553
+ kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490
+ kmem_cache_alloc+0x12e/0x730 mm/slab.c:3554
+ dst_alloc+0xbb/0x1d0 net/core/dst.c:105
+ ip6_dst_alloc+0x35/0xa0 net/ipv6/route.c:353
+ ip6_rt_cache_alloc+0x247/0x7b0 net/ipv6/route.c:1186
+ ip6_pol_route+0x8f8/0xd90 net/ipv6/route.c:1895
+ ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2093
+ fib6_rule_lookup+0x277/0x860 net/ipv6/fib6_rules.c:122
+ ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2121
+ ip6_route_output include/net/ip6_route.h:88 [inline]
+ ip6_dst_lookup_tail+0xe27/0x1d60 net/ipv6/ip6_output.c:951
+ ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079
+ rawv6_sendmsg+0x12d9/0x4630 net/ipv6/raw.c:905
+ inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
+ sock_sendmsg_nosec net/socket.c:621 [inline]
+ sock_sendmsg+0xd5/0x120 net/socket.c:631
+ ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114
+ __sys_sendmsg+0x11d/0x280 net/socket.c:2152
+ __do_sys_sendmsg net/socket.c:2161 [inline]
+ __se_sys_sendmsg net/socket.c:2159 [inline]
+ __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159
+ do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+Freed by task 5356:
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:448
+ set_track mm/kasan/kasan.c:460 [inline]
+ __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521
+ kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
+ __cache_free mm/slab.c:3498 [inline]
+ kmem_cache_free+0x83/0x290 mm/slab.c:3756
+ dst_destroy+0x267/0x3c0 net/core/dst.c:141
+ dst_destroy_rcu+0x16/0x19 net/core/dst.c:154
+ __rcu_reclaim kernel/rcu/rcu.h:236 [inline]
+ rcu_do_batch kernel/rcu/tree.c:2576 [inline]
+ invoke_rcu_callbacks kernel/rcu/tree.c:2880 [inline]
+ __rcu_process_callbacks kernel/rcu/tree.c:2847 [inline]
+ rcu_process_callbacks+0xf23/0x2670 kernel/rcu/tree.c:2864
+ __do_softirq+0x30b/0xad8 kernel/softirq.c:292
+
+Fixes: 1789a640f556 ("raw: avoid two atomics in xmit")
+Signed-off-by: Wei Wang <weiwan@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/raw.c |   29 ++++++++++++++++++++---------
+ 1 file changed, 20 insertions(+), 9 deletions(-)
+
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -650,8 +650,6 @@ static int rawv6_send_hdrinc(struct sock
+       skb->protocol = htons(ETH_P_IPV6);
+       skb->priority = sk->sk_priority;
+       skb->mark = sk->sk_mark;
+-      skb_dst_set(skb, &rt->dst);
+-      *dstp = NULL;
+ 
+       skb_put(skb, length);
+       skb_reset_network_header(skb);
+@@ -664,8 +662,14 @@ static int rawv6_send_hdrinc(struct sock
+ 
+       skb->transport_header = skb->network_header;
+       err = memcpy_from_msg(iph, msg, length);
+-      if (err)
+-              goto error_fault;
++      if (err) {
++              err = -EFAULT;
++              kfree_skb(skb);
++              goto error;
++      }
++
++      skb_dst_set(skb, &rt->dst);
++      *dstp = NULL;
+ 
+       /* if egress device is enslaved to an L3 master device pass the
+        * skb to its handler for processing
+@@ -674,21 +678,28 @@ static int rawv6_send_hdrinc(struct sock
+       if (unlikely(!skb))
+               return 0;
+ 
++      /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev
++       * in the error path. Since skb has been freed, the dst could
++       * have been queued for deletion.
++       */
++      rcu_read_lock();
+       IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
+       err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
+                     NULL, rt->dst.dev, dst_output);
+       if (err > 0)
+               err = net_xmit_errno(err);
+-      if (err)
+-              goto error;
++      if (err) {
++              IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
++              rcu_read_unlock();
++              goto error_check;
++      }
++      rcu_read_unlock();
+ out:
+       return 0;
+ 
+-error_fault:
+-      err = -EFAULT;
+-      kfree_skb(skb);
+ error:
+       IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
++error_check:
+       if (err == -ENOBUFS && !np->recverr)
+               err = 0;
+       return err;
diff --git a/queue-4.18/net-aquantia-memory-corruption-on-jumbo-frames.patch b/queue-4.18/net-aquantia-memory-corruption-on-jumbo-frames.patch

new file mode 100644 (file)

index 0000000..b61ebc6
--- /dev/null
+++ b/queue-4.18/net-aquantia-memory-corruption-on-jumbo-frames.patch
@@ -0,0 +1,89 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Friedemann Gerold <f.gerold@b-c-s.de>
+Date: Sat, 15 Sep 2018 18:03:39 +0300
+Subject: net: aquantia: memory corruption on jumbo frames
+
+From: Friedemann Gerold <f.gerold@b-c-s.de>
+
+[ Upstream commit d26ed6b0e5e23190d43ab34bc69cbecdc464a2cf ]
+
+This patch fixes skb_shared area, which will be corrupted
+upon reception of 4K jumbo packets.
+
+Originally build_skb usage purpose was to reuse page for skb to eliminate
+needs of extra fragments. But that logic does not take into account that
+skb_shared_info should be reserved at the end of skb data area.
+
+In case packet data consumes all the page (4K), skb_shinfo location
+overflows the page. As a consequence, __build_skb zeroed shinfo data above
+the allocated page, corrupting next page.
+
+The issue is rarely seen in real life because jumbo are normally larger
+than 4K and that causes another code path to trigger.
+But it 100% reproducible with simple scapy packet, like:
+
+    sendp(IP(dst="192.168.100.3") / TCP(dport=443) \
+          / Raw(RandString(size=(4096-40))), iface="enp1s0")
+
+Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code")
+
+Reported-by: Friedemann Gerold <f.gerold@b-c-s.de>
+Reported-by: Michael Rauch <michael@rauch.be>
+Signed-off-by: Friedemann Gerold <f.gerold@b-c-s.de>
+Tested-by: Nikita Danilov <nikita.danilov@aquantia.com>
+Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/aquantia/atlantic/aq_ring.c |   32 ++++++++++++-----------
+ 1 file changed, 18 insertions(+), 14 deletions(-)
+
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+@@ -225,9 +225,10 @@ int aq_ring_rx_clean(struct aq_ring_s *s
+               }
+ 
+               /* for single fragment packets use build_skb() */
+-              if (buff->is_eop) {
++              if (buff->is_eop &&
++                  buff->len <= AQ_CFG_RX_FRAME_MAX - AQ_SKB_ALIGN) {
+                       skb = build_skb(page_address(buff->page),
+-                                      buff->len + AQ_SKB_ALIGN);
++                                      AQ_CFG_RX_FRAME_MAX);
+                       if (unlikely(!skb)) {
+                               err = -ENOMEM;
+                               goto err_exit;
+@@ -247,18 +248,21 @@ int aq_ring_rx_clean(struct aq_ring_s *s
+                                       buff->len - ETH_HLEN,
+                                       SKB_TRUESIZE(buff->len - ETH_HLEN));
+ 
+-                      for (i = 1U, next_ = buff->next,
+-                           buff_ = &self->buff_ring[next_]; true;
+-                           next_ = buff_->next,
+-                           buff_ = &self->buff_ring[next_], ++i) {
+-                              skb_add_rx_frag(skb, i, buff_->page, 0,
+-                                              buff_->len,
+-                                              SKB_TRUESIZE(buff->len -
+-                                              ETH_HLEN));
+-                              buff_->is_cleaned = 1;
+-
+-                              if (buff_->is_eop)
+-                                      break;
++                      if (!buff->is_eop) {
++                              for (i = 1U, next_ = buff->next,
++                                   buff_ = &self->buff_ring[next_];
++                                   true; next_ = buff_->next,
++                                   buff_ = &self->buff_ring[next_], ++i) {
++                                      skb_add_rx_frag(skb, i,
++                                                      buff_->page, 0,
++                                                      buff_->len,
++                                                      SKB_TRUESIZE(buff->len -
++                                                      ETH_HLEN));
++                                      buff_->is_cleaned = 1;
++
++                                      if (buff_->is_eop)
++                                              break;
++                              }
+                       }
+               }
+ 
diff --git a/queue-4.18/net-dsa-b53-keep-cpu-port-as-tagged-in-all-vlans.patch b/queue-4.18/net-dsa-b53-keep-cpu-port-as-tagged-in-all-vlans.patch

new file mode 100644 (file)

index 0000000..2542a37
--- /dev/null
+++ b/queue-4.18/net-dsa-b53-keep-cpu-port-as-tagged-in-all-vlans.patch
@@ -0,0 +1,48 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Thu, 4 Oct 2018 20:24:13 -0700
+Subject: net: dsa: b53: Keep CPU port as tagged in all VLANs
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit ca8931948344c485569b04821d1f6bcebccd376b ]
+
+Commit c499696e7901 ("net: dsa: b53: Stop using dev->cpu_port
+incorrectly") was a bit too trigger happy in removing the CPU port from
+the VLAN membership because we rely on DSA to program the CPU port VLAN,
+which it does, except it does not bother itself with tagged/untagged and
+just usese untagged.
+
+Having the CPU port "follow" the user ports tagged/untagged is not great
+and does not allow for properly differentiating, so keep the CPU port
+tagged in all VLANs.
+
+Reported-by: Gerhard Wiesinger <lists@wiesinger.com>
+Fixes: c499696e7901 ("net: dsa: b53: Stop using dev->cpu_port incorrectly")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/b53/b53_common.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -1107,7 +1107,7 @@ void b53_vlan_add(struct dsa_switch *ds,
+               b53_get_vlan_entry(dev, vid, vl);
+ 
+               vl->members |= BIT(port);
+-              if (untagged)
++              if (untagged && !dsa_is_cpu_port(ds, port))
+                       vl->untag |= BIT(port);
+               else
+                       vl->untag &= ~BIT(port);
+@@ -1149,7 +1149,7 @@ int b53_vlan_del(struct dsa_switch *ds,
+                               pvid = 0;
+               }
+ 
+-              if (untagged)
++              if (untagged && !dsa_is_cpu_port(ds, port))
+                       vl->untag &= ~(BIT(port));
+ 
+               b53_set_vlan_entry(dev, vid, vl);
diff --git a/queue-4.18/net-dsa-bcm_sf2-call-setup-during-switch-resume.patch b/queue-4.18/net-dsa-bcm_sf2-call-setup-during-switch-resume.patch

new file mode 100644 (file)

index 0000000..c676963
--- /dev/null
+++ b/queue-4.18/net-dsa-bcm_sf2-call-setup-during-switch-resume.patch
@@ -0,0 +1,49 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Tue, 9 Oct 2018 16:48:58 -0700
+Subject: net: dsa: bcm_sf2: Call setup during switch resume
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 54baca096386d862d19c10f58f34bf787c6b3cbe ]
+
+There is no reason to open code what the switch setup function does, in
+fact, because we just issued a switch reset, we would make all the
+register get their default values, including for instance, having unused
+port be enabled again and wasting power and leading to an inappropriate
+switch core clock being selected.
+
+Fixes: 8cfa94984c9c ("net: dsa: bcm_sf2: add suspend/resume callbacks")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c |   10 +---------
+ 1 file changed, 1 insertion(+), 9 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -698,7 +698,6 @@ static int bcm_sf2_sw_suspend(struct dsa
+ static int bcm_sf2_sw_resume(struct dsa_switch *ds)
+ {
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+-      unsigned int port;
+       int ret;
+ 
+       ret = bcm_sf2_sw_rst(priv);
+@@ -710,14 +709,7 @@ static int bcm_sf2_sw_resume(struct dsa_
+       if (priv->hw_params.num_gphy == 1)
+               bcm_sf2_gphy_enable_set(ds, true);
+ 
+-      for (port = 0; port < DSA_MAX_PORTS; port++) {
+-              if (dsa_is_user_port(ds, port))
+-                      bcm_sf2_port_setup(ds, port, NULL);
+-              else if (dsa_is_cpu_port(ds, port))
+-                      bcm_sf2_imp_setup(ds, port);
+-      }
+-
+-      bcm_sf2_enable_acb(ds);
++      ds->ops->setup(ds);
+ 
+       return 0;
+ }
diff --git a/queue-4.18/net-dsa-bcm_sf2-fix-unbind-ordering.patch b/queue-4.18/net-dsa-bcm_sf2-fix-unbind-ordering.patch

new file mode 100644 (file)

index 0000000..c29fa44
--- /dev/null
+++ b/queue-4.18/net-dsa-bcm_sf2-fix-unbind-ordering.patch
@@ -0,0 +1,46 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Tue, 9 Oct 2018 16:48:57 -0700
+Subject: net: dsa: bcm_sf2: Fix unbind ordering
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit bf3b452b7af787b8bf27de6490dc4eedf6f97599 ]
+
+The order in which we release resources is unfortunately leading to bus
+errors while dismantling the port. This is because we set
+priv->wol_ports_mask to 0 to tell bcm_sf2_sw_suspend() that it is now
+permissible to clock gate the switch. Later on, when dsa_slave_destroy()
+comes in from dsa_unregister_switch() and calls
+dsa_switch_ops::port_disable, we perform the same dismantling again, and
+this time we hit registers that are clock gated.
+
+Make sure that dsa_unregister_switch() is the first thing that happens,
+which takes care of releasing all user visible resources, then proceed
+with clock gating hardware. We still need to set priv->wol_ports_mask to
+0 to make sure that an enabled port properly gets disabled in case it
+was previously used as part of Wake-on-LAN.
+
+Fixes: d9338023fb8e ("net: dsa: bcm_sf2: Make it a real platform device driver")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -1160,10 +1160,10 @@ static int bcm_sf2_sw_remove(struct plat
+ {
+       struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
+ 
+-      /* Disable all ports and interrupts */
+       priv->wol_ports_mask = 0;
+-      bcm_sf2_sw_suspend(priv->dev->ds);
+       dsa_unregister_switch(priv->dev->ds);
++      /* Disable all ports and interrupts */
++      bcm_sf2_sw_suspend(priv->dev->ds);
+       bcm_sf2_mdio_unregister(priv);
+ 
+       return 0;
diff --git a/queue-4.18/net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch b/queue-4.18/net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch

new file mode 100644 (file)

index 0000000..4ab7c64
--- /dev/null
+++ b/queue-4.18/net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch
@@ -0,0 +1,47 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: "Maciej Żenczykowski" <maze@google.com>
+Date: Sat, 22 Sep 2018 01:34:01 -0700
+Subject: net-ethtool: ETHTOOL_GUFO did not and should not require CAP_NET_ADMIN
+
+From: "Maciej Żenczykowski" <maze@google.com>
+
+[ Upstream commit 474ff2600889e16280dbc6ada8bfecb216169a70 ]
+
+So it should not fail with EPERM even though it is no longer implemented...
+
+This is a fix for:
+  (userns)$ egrep ^Cap /proc/self/status
+  CapInh: 0000003fffffffff
+  CapPrm: 0000003fffffffff
+  CapEff: 0000003fffffffff
+  CapBnd: 0000003fffffffff
+  CapAmb: 0000003fffffffff
+
+  (userns)$ tcpdump -i usb_rndis0
+  tcpdump: WARNING: usb_rndis0: SIOCETHTOOL(ETHTOOL_GUFO) ioctl failed: Operation not permitted
+  Warning: Kernel filter failed: Bad file descriptor
+  tcpdump: can't remove kernel filter: Bad file descriptor
+
+With this change it returns EOPNOTSUPP instead of EPERM.
+
+See also https://github.com/the-tcpdump-group/libpcap/issues/689
+
+Fixes: 08a00fea6de2 "net: Remove references to NETIF_F_UFO from ethtool."
+Cc: David S. Miller <davem@davemloft.net>
+Signed-off-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/ethtool.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/ethtool.c
++++ b/net/core/ethtool.c
+@@ -2623,6 +2623,7 @@ int dev_ethtool(struct net *net, struct
+       case ETHTOOL_GPHYSTATS:
+       case ETHTOOL_GTSO:
+       case ETHTOOL_GPERMADDR:
++      case ETHTOOL_GUFO:
+       case ETHTOOL_GGSO:
+       case ETHTOOL_GGRO:
+       case ETHTOOL_GFLAGS:
diff --git a/queue-4.18/net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch b/queue-4.18/net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch

new file mode 100644 (file)

index 0000000..6895514
--- /dev/null
+++ b/queue-4.18/net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch
@@ -0,0 +1,102 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Yunsheng Lin <linyunsheng@huawei.com>
+Date: Tue, 25 Sep 2018 10:21:55 +0100
+Subject: net: hns: fix for unmapping problem when SMMU is on
+
+From: Yunsheng Lin <linyunsheng@huawei.com>
+
+[ Upstream commit 2e9361efa707e186d91b938e44f9e326725259f7 ]
+
+If SMMU is on, there is more likely that skb_shinfo(skb)->frags[i]
+can not send by a single BD. when this happen, the
+hns_nic_net_xmit_hw function map the whole data in a frags using
+skb_frag_dma_map, but unmap each BD' data individually when tx is
+done, which causes problem when SMMU is on.
+
+This patch fixes this problem by ummapping the whole data in a
+frags when tx is done.
+
+Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
+Signed-off-by: Peng Li <lipeng321@huawei.com>
+Reviewed-by: Yisen Zhuang <yisen.zhuang@huawei.com>
+Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns/hnae.c     |    2 -
+ drivers/net/ethernet/hisilicon/hns/hns_enet.c |   30 ++++++++++++++++----------
+ 2 files changed, 20 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
++++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
+@@ -84,7 +84,7 @@ static void hnae_unmap_buffer(struct hna
+       if (cb->type == DESC_TYPE_SKB)
+               dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length,
+                                ring_to_dma_dir(ring));
+-      else
++      else if (cb->length)
+               dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length,
+                              ring_to_dma_dir(ring));
+ }
+--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
++++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+@@ -40,9 +40,9 @@
+ #define SKB_TMP_LEN(SKB) \
+       (((SKB)->transport_header - (SKB)->mac_header) + tcp_hdrlen(SKB))
+ 
+-static void fill_v2_desc(struct hnae_ring *ring, void *priv,
+-                       int size, dma_addr_t dma, int frag_end,
+-                       int buf_num, enum hns_desc_type type, int mtu)
++static void fill_v2_desc_hw(struct hnae_ring *ring, void *priv, int size,
++                          int send_sz, dma_addr_t dma, int frag_end,
++                          int buf_num, enum hns_desc_type type, int mtu)
+ {
+       struct hnae_desc *desc = &ring->desc[ring->next_to_use];
+       struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
+@@ -64,7 +64,7 @@ static void fill_v2_desc(struct hnae_rin
+       desc_cb->type = type;
+ 
+       desc->addr = cpu_to_le64(dma);
+-      desc->tx.send_size = cpu_to_le16((u16)size);
++      desc->tx.send_size = cpu_to_le16((u16)send_sz);
+ 
+       /* config bd buffer end */
+       hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1);
+@@ -133,6 +133,14 @@ static void fill_v2_desc(struct hnae_rin
+       ring_ptr_move_fw(ring, next_to_use);
+ }
+ 
++static void fill_v2_desc(struct hnae_ring *ring, void *priv,
++                       int size, dma_addr_t dma, int frag_end,
++                       int buf_num, enum hns_desc_type type, int mtu)
++{
++      fill_v2_desc_hw(ring, priv, size, size, dma, frag_end,
++                      buf_num, type, mtu);
++}
++
+ static const struct acpi_device_id hns_enet_acpi_match[] = {
+       { "HISI00C1", 0 },
+       { "HISI00C2", 0 },
+@@ -289,15 +297,15 @@ static void fill_tso_desc(struct hnae_ri
+ 
+       /* when the frag size is bigger than hardware, split this frag */
+       for (k = 0; k < frag_buf_num; k++)
+-              fill_v2_desc(ring, priv,
+-                           (k == frag_buf_num - 1) ?
++              fill_v2_desc_hw(ring, priv, k == 0 ? size : 0,
++                              (k == frag_buf_num - 1) ?
+                                       sizeoflast : BD_MAX_SEND_SIZE,
+-                           dma + BD_MAX_SEND_SIZE * k,
+-                           frag_end && (k == frag_buf_num - 1) ? 1 : 0,
+-                           buf_num,
+-                           (type == DESC_TYPE_SKB && !k) ?
++                              dma + BD_MAX_SEND_SIZE * k,
++                              frag_end && (k == frag_buf_num - 1) ? 1 : 0,
++                              buf_num,
++                              (type == DESC_TYPE_SKB && !k) ?
+                                       DESC_TYPE_SKB : DESC_TYPE_PAGE,
+-                           mtu);
++                              mtu);
+ }
+ 
+ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev,
diff --git a/queue-4.18/net-ipv4-don-t-let-pmtu-updates-increase-route-mtu.patch b/queue-4.18/net-ipv4-don-t-let-pmtu-updates-increase-route-mtu.patch

new file mode 100644 (file)

index 0000000..7cb1e00
--- /dev/null
+++ b/queue-4.18/net-ipv4-don-t-let-pmtu-updates-increase-route-mtu.patch
@@ -0,0 +1,58 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Tue, 9 Oct 2018 17:48:15 +0200
+Subject: net: ipv4: don't let PMTU updates increase route MTU
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit 28d35bcdd3925e7293408cdb8aa5f2aac5f0d6e3 ]
+
+When an MTU update with PMTU smaller than net.ipv4.route.min_pmtu is
+received, we must clamp its value. However, we can receive a PMTU
+exception with PMTU < old_mtu < ip_rt_min_pmtu, which would lead to an
+increase in PMTU.
+
+To fix this, take the smallest of the old MTU and ip_rt_min_pmtu.
+
+Before this patch, in case of an update, the exception's MTU would
+always change. Now, an exception can have only its lock flag updated,
+but not the MTU, so we need to add a check on locking to the following
+"is this exception getting updated, or close to expiring?" test.
+
+Fixes: d52e5a7e7ca4 ("ipv4: lock mtu in fnhe when received PMTU < net.ipv4.route.min_pmtu")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1001,21 +1001,22 @@ out:   kfree_skb(skb);
+ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
+ {
+       struct dst_entry *dst = &rt->dst;
++      u32 old_mtu = ipv4_mtu(dst);
+       struct fib_result res;
+       bool lock = false;
+ 
+       if (ip_mtu_locked(dst))
+               return;
+ 
+-      if (ipv4_mtu(dst) < mtu)
++      if (old_mtu < mtu)
+               return;
+ 
+       if (mtu < ip_rt_min_pmtu) {
+               lock = true;
+-              mtu = ip_rt_min_pmtu;
++              mtu = min(old_mtu, ip_rt_min_pmtu);
+       }
+ 
+-      if (rt->rt_pmtu == mtu &&
++      if (rt->rt_pmtu == mtu && !lock &&
+           time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
+               return;
+ 
diff --git a/queue-4.18/net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch b/queue-4.18/net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch

new file mode 100644 (file)

index 0000000..c2ef1e4
--- /dev/null
+++ b/queue-4.18/net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch
@@ -0,0 +1,218 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Tue, 9 Oct 2018 17:48:14 +0200
+Subject: net: ipv4: update fnhe_pmtu when first hop's MTU changes
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit af7d6cce53694a88d6a1bb60c9a239a6a5144459 ]
+
+Since commit 5aad1de5ea2c ("ipv4: use separate genid for next hop
+exceptions"), exceptions get deprecated separately from cached
+routes. In particular, administrative changes don't clear PMTU anymore.
+
+As Stefano described in commit e9fa1495d738 ("ipv6: Reflect MTU changes
+on PMTU of exceptions for MTU-less routes"), the PMTU discovered before
+the local MTU change can become stale:
+ - if the local MTU is now lower than the PMTU, that PMTU is now
+   incorrect
+ - if the local MTU was the lowest value in the path, and is increased,
+   we might discover a higher PMTU
+
+Similarly to what commit e9fa1495d738 did for IPv6, update PMTU in those
+cases.
+
+If the exception was locked, the discovered PMTU was smaller than the
+minimal accepted PMTU. In that case, if the new local MTU is smaller
+than the current PMTU, let PMTU discovery figure out if locking of the
+exception is still needed.
+
+To do this, we need to know the old link MTU in the NETDEV_CHANGEMTU
+notifier. By the time the notifier is called, dev->mtu has been
+changed. This patch adds the old MTU as additional information in the
+notifier structure, and a new call_netdevice_notifiers_u32() function.
+
+Fixes: 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netdevice.h |    7 ++++++
+ include/net/ip_fib.h      |    1 
+ net/core/dev.c            |   28 +++++++++++++++++++++++--
+ net/ipv4/fib_frontend.c   |   12 +++++++----
+ net/ipv4/fib_semantics.c  |   50 ++++++++++++++++++++++++++++++++++++++++++++++
+ 5 files changed, 92 insertions(+), 6 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -2420,6 +2420,13 @@ struct netdev_notifier_info {
+       struct netlink_ext_ack  *extack;
+ };
+ 
++struct netdev_notifier_info_ext {
++      struct netdev_notifier_info info; /* must be first */
++      union {
++              u32 mtu;
++      } ext;
++};
++
+ struct netdev_notifier_change_info {
+       struct netdev_notifier_info info; /* must be first */
+       unsigned int flags_changed;
+--- a/include/net/ip_fib.h
++++ b/include/net/ip_fib.h
+@@ -394,6 +394,7 @@ int ip_fib_check_default(__be32 gw, stru
+ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
+ int fib_sync_down_addr(struct net_device *dev, __be32 local);
+ int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
++void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
+ 
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1754,6 +1754,28 @@ int call_netdevice_notifiers(unsigned lo
+ }
+ EXPORT_SYMBOL(call_netdevice_notifiers);
+ 
++/**
++ *    call_netdevice_notifiers_mtu - call all network notifier blocks
++ *    @val: value passed unmodified to notifier function
++ *    @dev: net_device pointer passed unmodified to notifier function
++ *    @arg: additional u32 argument passed to the notifier function
++ *
++ *    Call all network notifier blocks.  Parameters and return value
++ *    are as for raw_notifier_call_chain().
++ */
++static int call_netdevice_notifiers_mtu(unsigned long val,
++                                      struct net_device *dev, u32 arg)
++{
++      struct netdev_notifier_info_ext info = {
++              .info.dev = dev,
++              .ext.mtu = arg,
++      };
++
++      BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);
++
++      return call_netdevice_notifiers_info(val, &info.info);
++}
++
+ #ifdef CONFIG_NET_INGRESS
+ static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);
+ 
+@@ -7118,14 +7140,16 @@ int dev_set_mtu(struct net_device *dev,
+       err = __dev_set_mtu(dev, new_mtu);
+ 
+       if (!err) {
+-              err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
++              err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
++                                                 orig_mtu);
+               err = notifier_to_errno(err);
+               if (err) {
+                       /* setting mtu back and notifying everyone again,
+                        * so that they have a chance to revert changes.
+                        */
+                       __dev_set_mtu(dev, orig_mtu);
+-                      call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
++                      call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
++                                                   new_mtu);
+               }
+       }
+       return err;
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -1243,7 +1243,8 @@ static int fib_inetaddr_event(struct not
+ static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+ {
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+-      struct netdev_notifier_changeupper_info *info;
++      struct netdev_notifier_changeupper_info *upper_info = ptr;
++      struct netdev_notifier_info_ext *info_ext = ptr;
+       struct in_device *in_dev;
+       struct net *net = dev_net(dev);
+       unsigned int flags;
+@@ -1278,16 +1279,19 @@ static int fib_netdev_event(struct notif
+                       fib_sync_up(dev, RTNH_F_LINKDOWN);
+               else
+                       fib_sync_down_dev(dev, event, false);
+-              /* fall through */
++              rt_cache_flush(net);
++              break;
+       case NETDEV_CHANGEMTU:
++              fib_sync_mtu(dev, info_ext->ext.mtu);
+               rt_cache_flush(net);
+               break;
+       case NETDEV_CHANGEUPPER:
+-              info = ptr;
++              upper_info = ptr;
+               /* flush all routes if dev is linked to or unlinked from
+                * an L3 master device (e.g., VRF)
+                */
+-              if (info->upper_dev && netif_is_l3_master(info->upper_dev))
++              if (upper_info->upper_dev &&
++                  netif_is_l3_master(upper_info->upper_dev))
+                       fib_disable_ip(dev, NETDEV_DOWN, true);
+               break;
+       }
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -1470,6 +1470,56 @@ static int call_fib_nh_notifiers(struct
+       return NOTIFY_DONE;
+ }
+ 
++/* Update the PMTU of exceptions when:
++ * - the new MTU of the first hop becomes smaller than the PMTU
++ * - the old MTU was the same as the PMTU, and it limited discovery of
++ *   larger MTUs on the path. With that limit raised, we can now
++ *   discover larger MTUs
++ * A special case is locked exceptions, for which the PMTU is smaller
++ * than the minimal accepted PMTU:
++ * - if the new MTU is greater than the PMTU, don't make any change
++ * - otherwise, unlock and set PMTU
++ */
++static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
++{
++      struct fnhe_hash_bucket *bucket;
++      int i;
++
++      bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
++      if (!bucket)
++              return;
++
++      for (i = 0; i < FNHE_HASH_SIZE; i++) {
++              struct fib_nh_exception *fnhe;
++
++              for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
++                   fnhe;
++                   fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) {
++                      if (fnhe->fnhe_mtu_locked) {
++                              if (new <= fnhe->fnhe_pmtu) {
++                                      fnhe->fnhe_pmtu = new;
++                                      fnhe->fnhe_mtu_locked = false;
++                              }
++                      } else if (new < fnhe->fnhe_pmtu ||
++                                 orig == fnhe->fnhe_pmtu) {
++                              fnhe->fnhe_pmtu = new;
++                      }
++              }
++      }
++}
++
++void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
++{
++      unsigned int hash = fib_devindex_hashfn(dev->ifindex);
++      struct hlist_head *head = &fib_info_devhash[hash];
++      struct fib_nh *nh;
++
++      hlist_for_each_entry(nh, head, nh_hash) {
++              if (nh->nh_dev == dev)
++                      nh_update_mtu(nh, dev->mtu, orig_mtu);
++      }
++}
++
+ /* Event              force Flags           Description
+  * NETDEV_CHANGE      0     LINKDOWN        Carrier OFF, not for scope host
+  * NETDEV_DOWN        0     LINKDOWN|DEAD   Link down, not for scope host
diff --git a/queue-4.18/net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch b/queue-4.18/net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch

new file mode 100644 (file)

index 0000000..72cf48d
--- /dev/null
+++ b/queue-4.18/net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch
@@ -0,0 +1,61 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Jeff Barnhill <0xeffeff@gmail.com>
+Date: Fri, 21 Sep 2018 00:45:27 +0000
+Subject: net/ipv6: Display all addresses in output of /proc/net/if_inet6
+
+From: Jeff Barnhill <0xeffeff@gmail.com>
+
+[ Upstream commit 86f9bd1ff61c413a2a251fa736463295e4e24733 ]
+
+The backend handling for /proc/net/if_inet6 in addrconf.c doesn't properly
+handle starting/stopping the iteration.  The problem is that at some point
+during the iteration, an overflow is detected and the process is
+subsequently stopped.  The item being shown via seq_printf() when the
+overflow occurs is not actually shown, though.  When start() is
+subsequently called to resume iterating, it returns the next item, and
+thus the item that was being processed when the overflow occurred never
+gets printed.
+
+Alter the meaning of the private data member "offset".  Currently, when it
+is not 0 (which only happens at the very beginning), "offset" represents
+the next hlist item to be printed.  After this change, "offset" always
+represents the current item.
+
+This is also consistent with the private data member "bucket", which
+represents the current bucket, and also the use of "pos" as defined in
+seq_file.txt:
+    The pos passed to start() will always be either zero, or the most
+    recent pos used in the previous session.
+
+Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -4203,7 +4203,6 @@ static struct inet6_ifaddr *if6_get_firs
+                               p++;
+                               continue;
+                       }
+-                      state->offset++;
+                       return ifa;
+               }
+ 
+@@ -4227,13 +4226,12 @@ static struct inet6_ifaddr *if6_get_next
+               return ifa;
+       }
+ 
++      state->offset = 0;
+       while (++state->bucket < IN6_ADDR_HSIZE) {
+-              state->offset = 0;
+               hlist_for_each_entry_rcu(ifa,
+                                    &inet6_addr_lst[state->bucket], addr_lst) {
+                       if (!net_eq(dev_net(ifa->idev->dev), net))
+                               continue;
+-                      state->offset++;
+                       return ifa;
+               }
+       }
diff --git a/queue-4.18/net-ipv6-remove-extra-call-to-ip6_convert_metrics-for-multipath-case.patch b/queue-4.18/net-ipv6-remove-extra-call-to-ip6_convert_metrics-for-multipath-case.patch

new file mode 100644 (file)

index 0000000..b30ff7c
--- /dev/null
+++ b/queue-4.18/net-ipv6-remove-extra-call-to-ip6_convert_metrics-for-multipath-case.patch
@@ -0,0 +1,37 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: David Ahern <dsahern@gmail.com>
+Date: Wed, 26 Sep 2018 17:35:14 -0700
+Subject: net/ipv6: Remove extra call to ip6_convert_metrics for multipath case
+
+From: David Ahern <dsahern@gmail.com>
+
+[ Upstream commit 36f19d5b4f99fa9fa8263877e5f8e669d7fddc14 ]
+
+The change to move metrics from the dst to rt6_info moved the call
+to ip6_convert_metrics from ip6_route_add to ip6_route_info_create. In
+doing so it makes the call in ip6_route_info_append redundant and
+actually leaks the metrics installed as part of the ip6_route_info_create.
+Remove the now unnecessary call.
+
+Fixes: d4ead6b34b67f ("net/ipv6: move metrics from dst to rt6_info")
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c |    5 -----
+ 1 file changed, 5 deletions(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -4314,11 +4314,6 @@ static int ip6_route_info_append(struct
+       if (!nh)
+               return -ENOMEM;
+       nh->fib6_info = rt;
+-      err = ip6_convert_metrics(net, rt, r_cfg);
+-      if (err) {
+-              kfree(nh);
+-              return err;
+-      }
+       memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
+       list_add_tail(&nh->next, rt6_nh_list);
+ 
diff --git a/queue-4.18/net-ipv6-stop-leaking-percpu-memory-in-fib6-info.patch b/queue-4.18/net-ipv6-stop-leaking-percpu-memory-in-fib6-info.patch

new file mode 100644 (file)

index 0000000..a38df75
--- /dev/null
+++ b/queue-4.18/net-ipv6-stop-leaking-percpu-memory-in-fib6-info.patch
@@ -0,0 +1,32 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Mike Rapoport <rppt@linux.vnet.ibm.com>
+Date: Tue, 9 Oct 2018 07:02:01 +0300
+Subject: net/ipv6: stop leaking percpu memory in fib6 info
+
+From: Mike Rapoport <rppt@linux.vnet.ibm.com>
+
+[ Upstream commit 7abab7b9b498650404800a08765f44929fee8f31 ]
+
+The fib6_info_alloc() function allocates percpu memory to hold per CPU
+pointers to rt6_info, but this memory is never freed. Fix it.
+
+Fixes: a64efe142f5e ("net/ipv6: introduce fib6_info struct and helpers")
+Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_fib.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -196,6 +196,8 @@ void fib6_info_destroy_rcu(struct rcu_he
+                               *ppcpu_rt = NULL;
+                       }
+               }
++
++              free_percpu(f6i->rt6i_pcpu);
+       }
+ 
+       lwtstate_put(f6i->fib6_nh.nh_lwtstate);
diff --git a/queue-4.18/net-mlx5-check-for-sq-and-not-rq-state-when-modifying-hairpin-sq.patch b/queue-4.18/net-mlx5-check-for-sq-and-not-rq-state-when-modifying-hairpin-sq.patch

new file mode 100644 (file)

index 0000000..0a5fefa
--- /dev/null
+++ b/queue-4.18/net-mlx5-check-for-sq-and-not-rq-state-when-modifying-hairpin-sq.patch
@@ -0,0 +1,39 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Alaa Hleihel <alaa@mellanox.com>
+Date: Mon, 3 Sep 2018 10:38:14 +0300
+Subject: net/mlx5: Check for SQ and not RQ state when modifying hairpin SQ
+
+From: Alaa Hleihel <alaa@mellanox.com>
+
+[ Upstream commit 6b359d5550a1ae7a1269c9dc1dd73dfdc4d6fe58 ]
+
+When modifying hairpin SQ, instead of checking if the next state equals
+to MLX5_SQC_STATE_RDY, we compare it against the MLX5_RQC_STATE_RDY enum
+value.
+
+The code worked since both of MLX5_RQC_STATE_RDY and MLX5_SQC_STATE_RDY
+have the same value today.
+
+This patch fixes this issue.
+
+Fixes: 18e568c390c6 ("net/mlx5: Hairpin pair core object setup")
+Change-Id: I6758aa7b4bd137966ae28206b70648c5bc223b46
+Signed-off-by: Alaa Hleihel <alaa@mellanox.com>
+Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/transobj.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+@@ -509,7 +509,7 @@ static int mlx5_hairpin_modify_sq(struct
+ 
+       sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+ 
+-      if (next_state == MLX5_RQC_STATE_RDY) {
++      if (next_state == MLX5_SQC_STATE_RDY) {
+               MLX5_SET(sqc, sqc, hairpin_peer_rq, peer_rq);
+               MLX5_SET(sqc, sqc, hairpin_peer_vhca, peer_vhca);
+       }
diff --git a/queue-4.18/net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch b/queue-4.18/net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch

new file mode 100644 (file)

index 0000000..9ed707f
--- /dev/null
+++ b/queue-4.18/net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch
@@ -0,0 +1,46 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Eran Ben Elisha <eranbe@mellanox.com>
+Date: Sun, 16 Sep 2018 14:45:27 +0300
+Subject: net/mlx5: E-Switch, Fix out of bound access when setting vport rate
+
+From: Eran Ben Elisha <eranbe@mellanox.com>
+
+[ Upstream commit 11aa5800ed66ed0415b7509f02881c76417d212a ]
+
+The code that deals with eswitch vport bw guarantee was going beyond the
+eswitch vport array limit, fix that.  This was pointed out by the kernel
+address sanitizer (KASAN).
+
+The error from KASAN log:
+[2018-09-15 15:04:45] BUG: KASAN: slab-out-of-bounds in
+mlx5_eswitch_set_vport_rate+0x8c1/0xae0 [mlx5_core]
+
+Fixes: c9497c98901c ("net/mlx5: Add support for setting VF min rate")
+Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
+Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -2000,7 +2000,7 @@ static u32 calculate_vports_min_rate_div
+       u32 max_guarantee = 0;
+       int i;
+ 
+-      for (i = 0; i <= esw->total_vports; i++) {
++      for (i = 0; i < esw->total_vports; i++) {
+               evport = &esw->vports[i];
+               if (!evport->enabled || evport->info.min_rate < max_guarantee)
+                       continue;
+@@ -2020,7 +2020,7 @@ static int normalize_vports_min_rate(str
+       int err;
+       int i;
+ 
+-      for (i = 0; i <= esw->total_vports; i++) {
++      for (i = 0; i < esw->total_vports; i++) {
+               evport = &esw->vports[i];
+               if (!evport->enabled)
+                       continue;
diff --git a/queue-4.18/net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch b/queue-4.18/net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch

new file mode 100644 (file)

index 0000000..334176a
--- /dev/null
+++ b/queue-4.18/net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch
@@ -0,0 +1,38 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Jianbo Liu <jianbol@mellanox.com>
+Date: Sat, 25 Aug 2018 03:29:58 +0000
+Subject: net/mlx5e: Set vlan masks for all offloaded TC rules
+
+From: Jianbo Liu <jianbol@mellanox.com>
+
+[ Upstream commit cee26487620bc9bc3c7db21b6984d91f7bae12ae ]
+
+In flow steering, if asked to, the hardware matches on the first ethertype
+which is not vlan. It's possible to set a rule as follows, which is meant
+to match on untagged packet, but will match on a vlan packet:
+    tc filter add dev eth0 parent ffff: protocol ip flower ...
+
+To avoid this for packets with single tag, we set vlan masks to tell
+hardware to check the tags for every matched packet.
+
+Fixes: 095b6cfd69ce ('net/mlx5e: Add TC vlan match parsing')
+Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
+Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -1339,6 +1339,9 @@ static int __parse_cls_flower(struct mlx
+ 
+                       *match_level = MLX5_MATCH_L2;
+               }
++      } else {
++              MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1);
++              MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
+       }
+ 
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
diff --git a/queue-4.18/net-mscc-fix-the-frame-extraction-into-the-skb.patch b/queue-4.18/net-mscc-fix-the-frame-extraction-into-the-skb.patch

new file mode 100644 (file)

index 0000000..d8e4cca
--- /dev/null
+++ b/queue-4.18/net-mscc-fix-the-frame-extraction-into-the-skb.patch
@@ -0,0 +1,59 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Antoine Tenart <antoine.tenart@bootlin.com>
+Date: Thu, 20 Sep 2018 12:08:54 +0200
+Subject: net: mscc: fix the frame extraction into the skb
+
+From: Antoine Tenart <antoine.tenart@bootlin.com>
+
+[ Upstream commit 652ef42c134da1bbb03bd4c9b4291dfaf8d7febb ]
+
+When extracting frames from the Ocelot switch, the frame check sequence
+(FCS) is present at the end of the data extracted. The FCS was put into
+the sk buffer which introduced some issues (as length related ones), as
+the FCS shouldn't be part of an Rx sk buffer.
+
+This patch fixes the Ocelot switch extraction behaviour by discarding
+the FCS.
+
+Fixes: a556c76adc05 ("net: mscc: Add initial Ocelot switch support")
+Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mscc/ocelot_board.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mscc/ocelot_board.c
++++ b/drivers/net/ethernet/mscc/ocelot_board.c
+@@ -91,7 +91,7 @@ static irqreturn_t ocelot_xtr_irq_handle
+               struct sk_buff *skb;
+               struct net_device *dev;
+               u32 *buf;
+-              int sz, len;
++              int sz, len, buf_len;
+               u32 ifh[4];
+               u32 val;
+               struct frame_info info;
+@@ -116,14 +116,20 @@ static irqreturn_t ocelot_xtr_irq_handle
+                       err = -ENOMEM;
+                       break;
+               }
+-              buf = (u32 *)skb_put(skb, info.len);
++              buf_len = info.len - ETH_FCS_LEN;
++              buf = (u32 *)skb_put(skb, buf_len);
+ 
+               len = 0;
+               do {
+                       sz = ocelot_rx_frame_word(ocelot, grp, false, &val);
+                       *buf++ = val;
+                       len += sz;
+-              } while ((sz == 4) && (len < info.len));
++              } while (len < buf_len);
++
++              /* Read the FCS and discard it */
++              sz = ocelot_rx_frame_word(ocelot, grp, false, &val);
++              /* Update the statistics if part of the FCS was read before */
++              len -= ETH_FCS_LEN - sz;
+ 
+               if (sz < 0) {
+                       err = sz;
diff --git a/queue-4.18/net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch b/queue-4.18/net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch

new file mode 100644 (file)

index 0000000..07cbc99
--- /dev/null
+++ b/queue-4.18/net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch
@@ -0,0 +1,66 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Maxime Chevallier <maxime.chevallier@bootlin.com>
+Date: Fri, 5 Oct 2018 09:04:40 +0200
+Subject: net: mvpp2: Extract the correct ethtype from the skb for tx csum offload
+
+From: Maxime Chevallier <maxime.chevallier@bootlin.com>
+
+[ Upstream commit 35f3625c21852ad839f20c91c7d81c4c1101e207 ]
+
+When offloading the L3 and L4 csum computation on TX, we need to extract
+the l3_proto from the ethtype, independently of the presence of a vlan
+tag.
+
+The actual driver uses skb->protocol as-is, resulting in packets with
+the wrong L4 checksum being sent when there's a vlan tag in the packet
+header and checksum offloading is enabled.
+
+This commit makes use of vlan_protocol_get() to get the correct ethtype
+regardless the presence of a vlan tag.
+
+Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit")
+Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+@@ -1725,7 +1725,7 @@ static void mvpp2_txq_desc_put(struct mv
+ }
+ 
+ /* Set Tx descriptors fields relevant for CSUM calculation */
+-static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto,
++static u32 mvpp2_txq_desc_csum(int l3_offs, __be16 l3_proto,
+                              int ip_hdr_len, int l4_proto)
+ {
+       u32 command;
+@@ -2600,14 +2600,15 @@ static u32 mvpp2_skb_tx_csum(struct mvpp
+       if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               int ip_hdr_len = 0;
+               u8 l4_proto;
++              __be16 l3_proto = vlan_get_protocol(skb);
+ 
+-              if (skb->protocol == htons(ETH_P_IP)) {
++              if (l3_proto == htons(ETH_P_IP)) {
+                       struct iphdr *ip4h = ip_hdr(skb);
+ 
+                       /* Calculate IPv4 checksum and L4 checksum */
+                       ip_hdr_len = ip4h->ihl;
+                       l4_proto = ip4h->protocol;
+-              } else if (skb->protocol == htons(ETH_P_IPV6)) {
++              } else if (l3_proto == htons(ETH_P_IPV6)) {
+                       struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ 
+                       /* Read l4_protocol from one of IPv6 extra headers */
+@@ -2619,7 +2620,7 @@ static u32 mvpp2_skb_tx_csum(struct mvpp
+               }
+ 
+               return mvpp2_txq_desc_csum(skb_network_offset(skb),
+-                              skb->protocol, ip_hdr_len, l4_proto);
++                                         l3_proto, ip_hdr_len, l4_proto);
+       }
+ 
+       return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE;
diff --git a/queue-4.18/net-mvpp2-fix-a-txq_done-race-condition.patch b/queue-4.18/net-mvpp2-fix-a-txq_done-race-condition.patch

new file mode 100644 (file)

index 0000000..747dffa
--- /dev/null
+++ b/queue-4.18/net-mvpp2-fix-a-txq_done-race-condition.patch
@@ -0,0 +1,43 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Antoine Tenart <antoine.tenart@bootlin.com>
+Date: Tue, 18 Sep 2018 16:58:47 +0200
+Subject: net: mvpp2: fix a txq_done race condition
+
+From: Antoine Tenart <antoine.tenart@bootlin.com>
+
+[ Upstream commit 774268f3e51b53ed432a1ec516574fd5ba469398 ]
+
+When no Tx IRQ is available, the txq_done() routine (called from
+tx_done()) shouldn't be called from the polling function, as in such
+case it is already called in the Tx path thanks to an hrtimer. This
+mostly occurred when using PPv2.1, as the engine then do not have Tx
+IRQs.
+
+Fixes: edc660fa09e2 ("net: mvpp2: replace TX coalescing interrupts with hrtimer")
+Reported-by: Stefan Chulski <stefanc@marvell.com>
+Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+@@ -3056,10 +3056,12 @@ static int mvpp2_poll(struct napi_struct
+                                  cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK);
+       }
+ 
+-      cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
+-      if (cause_tx) {
+-              cause_tx >>= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET;
+-              mvpp2_tx_done(port, cause_tx, qv->sw_thread_id);
++      if (port->has_tx_irqs) {
++              cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
++              if (cause_tx) {
++                      cause_tx >>= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET;
++                      mvpp2_tx_done(port, cause_tx, qv->sw_thread_id);
++              }
+       }
+ 
+       /* Process RX packets */
diff --git a/queue-4.18/net-packet-fix-packet-drop-as-of-virtio-gso.patch b/queue-4.18/net-packet-fix-packet-drop-as-of-virtio-gso.patch

new file mode 100644 (file)

index 0000000..8b1e8ed
--- /dev/null
+++ b/queue-4.18/net-packet-fix-packet-drop-as-of-virtio-gso.patch
@@ -0,0 +1,79 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>
+Date: Sat, 29 Sep 2018 15:41:27 +0000
+Subject: net/packet: fix packet drop as of virtio gso
+
+From: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>
+
+[ Upstream commit 9d2f67e43b73e8af7438be219b66a5de0cfa8bd9 ]
+
+When we use raw socket as the vhost backend, a packet from virito with
+gso offloading information, cannot be sent out in later validaton at
+xmit path, as we did not set correct skb->protocol which is further used
+for looking up the gso function.
+
+To fix this, we set this field according to virito hdr information.
+
+Fixes: e858fae2b0b8f4 ("virtio_net: use common code for virtio_net_hdr and skb GSO conversion")
+Signed-off-by: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/virtio_net.h |   18 ++++++++++++++++++
+ net/packet/af_packet.c     |   11 +++++++----
+ 2 files changed, 25 insertions(+), 4 deletions(-)
+
+--- a/include/linux/virtio_net.h
++++ b/include/linux/virtio_net.h
+@@ -5,6 +5,24 @@
+ #include <linux/if_vlan.h>
+ #include <uapi/linux/virtio_net.h>
+ 
++static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
++                                         const struct virtio_net_hdr *hdr)
++{
++      switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
++      case VIRTIO_NET_HDR_GSO_TCPV4:
++      case VIRTIO_NET_HDR_GSO_UDP:
++              skb->protocol = cpu_to_be16(ETH_P_IP);
++              break;
++      case VIRTIO_NET_HDR_GSO_TCPV6:
++              skb->protocol = cpu_to_be16(ETH_P_IPV6);
++              break;
++      default:
++              return -EINVAL;
++      }
++
++      return 0;
++}
++
+ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
+                                       const struct virtio_net_hdr *hdr,
+                                       bool little_endian)
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2712,10 +2712,12 @@ tpacket_error:
+                       }
+               }
+ 
+-              if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr,
+-                                                            vio_le())) {
+-                      tp_len = -EINVAL;
+-                      goto tpacket_error;
++              if (po->has_vnet_hdr) {
++                      if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
++                              tp_len = -EINVAL;
++                              goto tpacket_error;
++                      }
++                      virtio_net_hdr_set_proto(skb, vnet_hdr);
+               }
+ 
+               skb->destructor = tpacket_destruct_skb;
+@@ -2911,6 +2913,7 @@ static int packet_snd(struct socket *soc
+               if (err)
+                       goto out_free;
+               len += sizeof(vnet_hdr);
++              virtio_net_hdr_set_proto(skb, &vnet_hdr);
+       }
+ 
+       skb_probe_transport_header(skb, reserve);
diff --git a/queue-4.18/net-phy-phylink-fix-sfp-interface-autodetection.patch b/queue-4.18/net-phy-phylink-fix-sfp-interface-autodetection.patch

new file mode 100644 (file)

index 0000000..66964a3
--- /dev/null
+++ b/queue-4.18/net-phy-phylink-fix-sfp-interface-autodetection.patch
@@ -0,0 +1,106 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Baruch Siach <baruch@tkos.co.il>
+Date: Wed, 3 Oct 2018 19:04:49 +0300
+Subject: net: phy: phylink: fix SFP interface autodetection
+
+From: Baruch Siach <baruch@tkos.co.il>
+
+[ Upstream commit 7e4183752735deb7543e179a44f4f4b44917cd6f ]
+
+When connecting SFP PHY to phylink use the detected interface.
+Otherwise, the link fails to come up when the configured 'phy-mode'
+differs from the SFP detected mode.
+
+Move most of phylink_connect_phy() into __phylink_connect_phy(), and
+leave phylink_connect_phy() as a wrapper. phylink_sfp_connect_phy() can
+now pass the SFP detected PHY interface to __phylink_connect_phy().
+
+This fixes 1GB SFP module link up on eth3 of the Macchiatobin board that
+is configured in the DT to "2500base-x" phy-mode.
+
+Fixes: 9525ae83959b6 ("phylink: add phylink infrastructure")
+Suggested-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Baruch Siach <baruch@tkos.co.il>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phylink.c |   48 ++++++++++++++++++++++++++--------------------
+ 1 file changed, 28 insertions(+), 20 deletions(-)
+
+--- a/drivers/net/phy/phylink.c
++++ b/drivers/net/phy/phylink.c
+@@ -717,6 +717,30 @@ static int phylink_bringup_phy(struct ph
+       return 0;
+ }
+ 
++static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy,
++              phy_interface_t interface)
++{
++      int ret;
++
++      if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED ||
++                  (pl->link_an_mode == MLO_AN_INBAND &&
++                   phy_interface_mode_is_8023z(interface))))
++              return -EINVAL;
++
++      if (pl->phydev)
++              return -EBUSY;
++
++      ret = phy_attach_direct(pl->netdev, phy, 0, interface);
++      if (ret)
++              return ret;
++
++      ret = phylink_bringup_phy(pl, phy);
++      if (ret)
++              phy_detach(phy);
++
++      return ret;
++}
++
+ /**
+  * phylink_connect_phy() - connect a PHY to the phylink instance
+  * @pl: a pointer to a &struct phylink returned from phylink_create()
+@@ -734,31 +758,13 @@ static int phylink_bringup_phy(struct ph
+  */
+ int phylink_connect_phy(struct phylink *pl, struct phy_device *phy)
+ {
+-      int ret;
+-
+-      if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED ||
+-                  (pl->link_an_mode == MLO_AN_INBAND &&
+-                   phy_interface_mode_is_8023z(pl->link_interface))))
+-              return -EINVAL;
+-
+-      if (pl->phydev)
+-              return -EBUSY;
+-
+       /* Use PHY device/driver interface */
+       if (pl->link_interface == PHY_INTERFACE_MODE_NA) {
+               pl->link_interface = phy->interface;
+               pl->link_config.interface = pl->link_interface;
+       }
+ 
+-      ret = phy_attach_direct(pl->netdev, phy, 0, pl->link_interface);
+-      if (ret)
+-              return ret;
+-
+-      ret = phylink_bringup_phy(pl, phy);
+-      if (ret)
+-              phy_detach(phy);
+-
+-      return ret;
++      return __phylink_connect_phy(pl, phy, pl->link_interface);
+ }
+ EXPORT_SYMBOL_GPL(phylink_connect_phy);
+ 
+@@ -1672,7 +1678,9 @@ static void phylink_sfp_link_up(void *up
+ 
+ static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)
+ {
+-      return phylink_connect_phy(upstream, phy);
++      struct phylink *pl = upstream;
++
++      return __phylink_connect_phy(upstream, phy, pl->link_config.interface);
+ }
+ 
+ static void phylink_sfp_disconnect_phy(void *upstream)
diff --git a/queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-receive-path.patch b/queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-receive-path.patch

new file mode 100644 (file)

index 0000000..a847a6a
--- /dev/null
+++ b/queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-receive-path.patch
@@ -0,0 +1,49 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Date: Tue, 2 Oct 2018 18:52:03 -0600
+Subject: net: qualcomm: rmnet: Fix incorrect allocation flag in receive path
+
+From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+
+[ Upstream commit ec405641e2b73160e26ef17580d0cf28565d146c ]
+
+The incoming skb needs to be reallocated in case the headroom
+is not sufficient to adjust the ethernet header. This allocation
+needs to be atomic otherwise it results in this splat
+
+ [<600601bb>] ___might_sleep+0x185/0x1a3
+ [<603f6314>] ? _raw_spin_unlock_irqrestore+0x0/0x27
+ [<60069bb0>] ? __wake_up_common_lock+0x95/0xd1
+ [<600602b0>] __might_sleep+0xd7/0xe2
+ [<60065598>] ? enqueue_task_fair+0x112/0x209
+ [<600eea13>] __kmalloc_track_caller+0x5d/0x124
+ [<600ee9b6>] ? __kmalloc_track_caller+0x0/0x124
+ [<602696d5>] __kmalloc_reserve.isra.34+0x30/0x7e
+ [<603f629b>] ? _raw_spin_lock_irqsave+0x0/0x3d
+ [<6026b744>] pskb_expand_head+0xbf/0x310
+ [<6025ca6a>] rmnet_rx_handler+0x7e/0x16b
+ [<6025c9ec>] ? rmnet_rx_handler+0x0/0x16b
+ [<6027ad0c>] __netif_receive_skb_core+0x301/0x96f
+ [<60033c17>] ? set_signals+0x0/0x40
+ [<6027bbcb>] __netif_receive_skb+0x24/0x8e
+
+Fixes: 74692caf1b0b ("net: qualcomm: rmnet: Process packets over ethernet")
+Signed-off-by: Sean Tranchetti <stranche@codeaurora.org>
+Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
++++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+@@ -113,7 +113,7 @@ rmnet_map_ingress_handler(struct sk_buff
+       struct sk_buff *skbn;
+ 
+       if (skb->dev->type == ARPHRD_ETHER) {
+-              if (pskb_expand_head(skb, ETH_HLEN, 0, GFP_KERNEL)) {
++              if (pskb_expand_head(skb, ETH_HLEN, 0, GFP_ATOMIC)) {
+                       kfree_skb(skb);
+                       return;
+               }
diff --git a/queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-transmit.patch b/queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-transmit.patch

new file mode 100644 (file)

index 0000000..b1d8a9f
--- /dev/null
+++ b/queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-transmit.patch
@@ -0,0 +1,52 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Date: Tue, 2 Oct 2018 18:52:02 -0600
+Subject: net: qualcomm: rmnet: Fix incorrect allocation flag in transmit
+
+From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+
+[ Upstream commit 6392ff3c8e4c23d0a09b0ae9f94feb3effed490b ]
+
+The incoming skb needs to be reallocated in case the headroom
+is not sufficient to add the MAP header. This allocation needs to
+be atomic otherwise it results in the following splat
+
+[32805.801456] BUG: sleeping function called from invalid context
+[32805.841141] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
+[32805.904773] task: ffffffd7c5f62280 task.stack: ffffff80464a8000
+[32805.910851] pc : ___might_sleep+0x180/0x188
+[32805.915143] lr : ___might_sleep+0x180/0x188
+[32806.131520] Call trace:
+[32806.134041]  ___might_sleep+0x180/0x188
+[32806.137980]  __might_sleep+0x50/0x84
+[32806.141653]  __kmalloc_track_caller+0x80/0x3bc
+[32806.146215]  __kmalloc_reserve+0x3c/0x88
+[32806.150241]  pskb_expand_head+0x74/0x288
+[32806.154269]  rmnet_egress_handler+0xb0/0x1d8
+[32806.162239]  rmnet_vnd_start_xmit+0xc8/0x13c
+[32806.166627]  dev_hard_start_xmit+0x148/0x280
+[32806.181181]  sch_direct_xmit+0xa4/0x198
+[32806.185125]  __qdisc_run+0x1f8/0x310
+[32806.188803]  net_tx_action+0x23c/0x26c
+[32806.192655]  __do_softirq+0x220/0x408
+[32806.196420]  do_softirq+0x4c/0x70
+
+Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation")
+Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
++++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+@@ -147,7 +147,7 @@ static int rmnet_map_egress_handler(stru
+       }
+ 
+       if (skb_headroom(skb) < required_headroom) {
+-              if (pskb_expand_head(skb, required_headroom, 0, GFP_KERNEL))
++              if (pskb_expand_head(skb, required_headroom, 0, GFP_ATOMIC))
+                       return -ENOMEM;
+       }
+ 
diff --git a/queue-4.18/net-qualcomm-rmnet-skip-processing-loopback-packets.patch b/queue-4.18/net-qualcomm-rmnet-skip-processing-loopback-packets.patch

new file mode 100644 (file)

index 0000000..0b034f1
--- /dev/null
+++ b/queue-4.18/net-qualcomm-rmnet-skip-processing-loopback-packets.patch
@@ -0,0 +1,34 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Sean Tranchetti <stranche@codeaurora.org>
+Date: Tue, 2 Oct 2018 18:52:01 -0600
+Subject: net: qualcomm: rmnet: Skip processing loopback packets
+
+From: Sean Tranchetti <stranche@codeaurora.org>
+
+[ Upstream commit a07f388e2cde2be74b263f85df6f672fea0305a1 ]
+
+RMNET RX handler was processing invalid packets that were
+originally sent on the real device and were looped back via
+dev_loopback_xmit(). This was detected using syzkaller.
+
+Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation")
+Signed-off-by: Sean Tranchetti <stranche@codeaurora.org>
+Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
++++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+@@ -189,6 +189,9 @@ rx_handler_result_t rmnet_rx_handler(str
+       if (!skb)
+               goto done;
+ 
++      if (skb->pkt_type == PACKET_LOOPBACK)
++              return RX_HANDLER_PASS;
++
+       dev = skb->dev;
+       port = rmnet_get_port(dev);
+ 
diff --git a/queue-4.18/net-sched-add-policy-validation-for-tc-attributes.patch b/queue-4.18/net-sched-add-policy-validation-for-tc-attributes.patch

new file mode 100644 (file)

index 0000000..5ae8269
--- /dev/null
+++ b/queue-4.18/net-sched-add-policy-validation-for-tc-attributes.patch
@@ -0,0 +1,87 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: David Ahern <dsahern@gmail.com>
+Date: Wed, 3 Oct 2018 15:05:36 -0700
+Subject: net: sched: Add policy validation for tc attributes
+
+From: David Ahern <dsahern@gmail.com>
+
+[ Upstream commit 8b4c3cdd9dd8290343ce959a132d3b334062c5b9 ]
+
+A number of TC attributes are processed without proper validation
+(e.g., length checks). Add a tca policy for all input attributes and use
+when invoking nlmsg_parse.
+
+The 2 Fixes tags below cover the latest additions. The other attributes
+are a string (KIND), nested attribute (OPTIONS which does seem to have
+validation in most cases), for dumps only or a flag.
+
+Fixes: 5bc1701881e39 ("net: sched: introduce multichain support for filters")
+Fixes: d47a6b0e7c492 ("net: sched: introduce ingress/egress block index attributes for qdisc")
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_api.c |   24 ++++++++++++++++++++----
+ 1 file changed, 20 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -1304,6 +1304,18 @@ check_loop_fn(struct Qdisc *q, unsigned
+  * Delete/get qdisc.
+  */
+ 
++const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
++      [TCA_KIND]              = { .type = NLA_STRING },
++      [TCA_OPTIONS]           = { .type = NLA_NESTED },
++      [TCA_RATE]              = { .type = NLA_BINARY,
++                                  .len = sizeof(struct tc_estimator) },
++      [TCA_STAB]              = { .type = NLA_NESTED },
++      [TCA_DUMP_INVISIBLE]    = { .type = NLA_FLAG },
++      [TCA_CHAIN]             = { .type = NLA_U32 },
++      [TCA_INGRESS_BLOCK]     = { .type = NLA_U32 },
++      [TCA_EGRESS_BLOCK]      = { .type = NLA_U32 },
++};
++
+ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+                       struct netlink_ext_ack *extack)
+ {
+@@ -1320,7 +1332,8 @@ static int tc_get_qdisc(struct sk_buff *
+           !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+               return -EPERM;
+ 
+-      err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
++      err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
++                        extack);
+       if (err < 0)
+               return err;
+ 
+@@ -1404,7 +1417,8 @@ static int tc_modify_qdisc(struct sk_buf
+ 
+ replay:
+       /* Reinit, just in case something touches this. */
+-      err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
++      err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
++                        extack);
+       if (err < 0)
+               return err;
+ 
+@@ -1638,7 +1652,8 @@ static int tc_dump_qdisc(struct sk_buff
+       idx = 0;
+       ASSERT_RTNL();
+ 
+-      err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
++      err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
++                        rtm_tca_policy, NULL);
+       if (err < 0)
+               return err;
+ 
+@@ -1857,7 +1872,8 @@ static int tc_ctl_tclass(struct sk_buff
+           !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+               return -EPERM;
+ 
+-      err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
++      err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
++                        extack);
+       if (err < 0)
+               return err;
+ 
diff --git a/queue-4.18/net-sched-cls_u32-fix-hnode-refcounting.patch b/queue-4.18/net-sched-cls_u32-fix-hnode-refcounting.patch

new file mode 100644 (file)

index 0000000..fd24973
--- /dev/null
+++ b/queue-4.18/net-sched-cls_u32-fix-hnode-refcounting.patch
@@ -0,0 +1,107 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Sun, 7 Oct 2018 07:40:17 -0400
+Subject: net: sched: cls_u32: fix hnode refcounting
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit 6d4c407744dd0338da5d5d76f40dce5adabfb30a ]
+
+cls_u32.c misuses refcounts for struct tc_u_hnode - it counts references
+via ->hlist and via ->tp_root together.  u32_destroy() drops the former
+and, in case when there had been links, leaves the sucker on the list.
+As the result, there's nothing to protect it from getting freed once links
+are dropped.
+That also makes the "is it busy" check incapable of catching the root
+hnode - it *is* busy (there's a reference from tp), but we don't see it as
+something separate.  "Is it our root?" check partially covers that, but
+the problem exists for others' roots as well.
+
+AFAICS, the minimal fix preserving the existing behaviour (where it doesn't
+include oopsen, that is) would be this:
+        * count tp->root and tp_c->hlist as separate references.  I.e.
+have u32_init() set refcount to 2, not 1.
+       * in u32_destroy() we always drop the former;
+in u32_destroy_hnode() - the latter.
+
+       That way we have *all* references contributing to refcount.  List
+removal happens in u32_destroy_hnode() (called only when ->refcnt is 1)
+an in u32_destroy() in case of tc_u_common going away, along with
+everything reachable from it.  IOW, that way we know that
+u32_destroy_key() won't free something still on the list (or pointed to by
+someone's ->root).
+
+Reproducer:
+
+tc qdisc add dev eth0 ingress
+tc filter add dev eth0 parent ffff: protocol ip prio 100 handle 1: \
+u32 divisor 1
+tc filter add dev eth0 parent ffff: protocol ip prio 200 handle 2: \
+u32 divisor 1
+tc filter add dev eth0 parent ffff: protocol ip prio 100 \
+handle 1:0:11 u32 ht 1: link 801: offset at 0 mask 0f00 shift 6 \
+plus 0 eat match ip protocol 6 ff
+tc filter delete dev eth0 parent ffff: protocol ip prio 200
+tc filter change dev eth0 parent ffff: protocol ip prio 100 \
+handle 1:0:11 u32 ht 1: link 0: offset at 0 mask 0f00 shift 6 plus 0 \
+eat match ip protocol 6 ff
+tc filter delete dev eth0 parent ffff: protocol ip prio 100
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_u32.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -397,6 +397,7 @@ static int u32_init(struct tcf_proto *tp
+       rcu_assign_pointer(tp_c->hlist, root_ht);
+       root_ht->tp_c = tp_c;
+ 
++      root_ht->refcnt++;
+       rcu_assign_pointer(tp->root, root_ht);
+       tp->data = tp_c;
+       return 0;
+@@ -608,7 +609,7 @@ static int u32_destroy_hnode(struct tcf_
+       struct tc_u_hnode __rcu **hn;
+       struct tc_u_hnode *phn;
+ 
+-      WARN_ON(ht->refcnt);
++      WARN_ON(--ht->refcnt);
+ 
+       u32_clear_hnode(tp, ht, extack);
+ 
+@@ -647,7 +648,7 @@ static void u32_destroy(struct tcf_proto
+ 
+       WARN_ON(root_ht == NULL);
+ 
+-      if (root_ht && --root_ht->refcnt == 0)
++      if (root_ht && --root_ht->refcnt == 1)
+               u32_destroy_hnode(tp, root_ht, extack);
+ 
+       if (--tp_c->refcnt == 0) {
+@@ -696,7 +697,6 @@ static int u32_delete(struct tcf_proto *
+       }
+ 
+       if (ht->refcnt == 1) {
+-              ht->refcnt--;
+               u32_destroy_hnode(tp, ht, extack);
+       } else {
+               NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
+@@ -706,11 +706,11 @@ static int u32_delete(struct tcf_proto *
+ out:
+       *last = true;
+       if (root_ht) {
+-              if (root_ht->refcnt > 1) {
++              if (root_ht->refcnt > 2) {
+                       *last = false;
+                       goto ret;
+               }
+-              if (root_ht->refcnt == 1) {
++              if (root_ht->refcnt == 2) {
+                       if (!ht_empty(root_ht)) {
+                               *last = false;
+                               goto ret;
diff --git a/queue-4.18/net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch b/queue-4.18/net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch

new file mode 100644 (file)

index 0000000..c277229
--- /dev/null
+++ b/queue-4.18/net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch
@@ -0,0 +1,62 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Jose Abreu <Jose.Abreu@synopsys.com>
+Date: Mon, 17 Sep 2018 09:22:57 +0100
+Subject: net: stmmac: Fixup the tail addr setting in xmit path
+
+From: Jose Abreu <Jose.Abreu@synopsys.com>
+
+[ Upstream commit 0431100b3d82c509729ece1ab22ada2484e209c1 ]
+
+Currently we are always setting the tail address of descriptor list to
+the end of the pre-allocated list.
+
+According to databook this is not correct. Tail address should point to
+the last available descriptor + 1, which means we have to update the
+tail address everytime we call the xmit function.
+
+This should make no impact in older versions of MAC but in newer
+versions there are some DMA features which allows the IP to fetch
+descriptors in advance and in a non sequential order so its critical
+that we set the tail address correctly.
+
+Signed-off-by: Jose Abreu <joabreu@synopsys.com>
+Fixes: f748be531d70 ("stmmac: support new GMAC4")
+Cc: David S. Miller <davem@davemloft.net>
+Cc: Joao Pinto <jpinto@synopsys.com>
+Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+Cc: Alexandre Torgue <alexandre.torgue@st.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -2193,8 +2193,7 @@ static int stmmac_init_dma_engine(struct
+               stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg,
+                                   tx_q->dma_tx_phy, chan);
+ 
+-              tx_q->tx_tail_addr = tx_q->dma_tx_phy +
+-                          (DMA_TX_SIZE * sizeof(struct dma_desc));
++              tx_q->tx_tail_addr = tx_q->dma_tx_phy;
+               stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
+                                      tx_q->tx_tail_addr, chan);
+       }
+@@ -2971,6 +2970,7 @@ static netdev_tx_t stmmac_tso_xmit(struc
+ 
+       netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
+ 
++      tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc));
+       stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
+ 
+       return NETDEV_TX_OK;
+@@ -3178,6 +3178,8 @@ static netdev_tx_t stmmac_xmit(struct sk
+       netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
+ 
+       stmmac_enable_dma_transmission(priv, priv->ioaddr);
++
++      tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc));
+       stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
+ 
+       return NETDEV_TX_OK;
diff --git a/queue-4.18/net-stmmac-rework-coalesce-timer-and-fix-multi-queue-races.patch b/queue-4.18/net-stmmac-rework-coalesce-timer-and-fix-multi-queue-races.patch

new file mode 100644 (file)

index 0000000..c3b7b1f
--- /dev/null
+++ b/queue-4.18/net-stmmac-rework-coalesce-timer-and-fix-multi-queue-races.patch
@@ -0,0 +1,540 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Jose Abreu <Jose.Abreu@synopsys.com>
+Date: Mon, 17 Sep 2018 09:22:56 +0100
+Subject: net: stmmac: Rework coalesce timer and fix multi-queue races
+
+From: Jose Abreu <Jose.Abreu@synopsys.com>
+
+[ Upstream commit 8fce3331702316d4bcfeb0771c09ac75d2192bbc ]
+
+This follows David Miller advice and tries to fix coalesce timer in
+multi-queue scenarios.
+
+We are now using per-queue coalesce values and per-queue TX timer.
+
+Coalesce timer default values was changed to 1ms and the coalesce frames
+to 25.
+
+Tested in B2B setup between XGMAC2 and GMAC5.
+
+Signed-off-by: Jose Abreu <joabreu@synopsys.com>
+Fixes:         ce736788e8a ("net: stmmac: adding multiple buffers for TX")
+Cc: Florian Fainelli <f.fainelli@gmail.com>
+Cc: Neil Armstrong <narmstrong@baylibre.com>
+Cc: Jerome Brunet <jbrunet@baylibre.com>
+Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Cc: David S. Miller <davem@davemloft.net>
+Cc: Joao Pinto <jpinto@synopsys.com>
+Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+Cc: Alexandre Torgue <alexandre.torgue@st.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/common.h      |    4 
+ drivers/net/ethernet/stmicro/stmmac/stmmac.h      |   14 +
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |  234 ++++++++++++----------
+ include/linux/stmmac.h                            |    1 
+ 4 files changed, 146 insertions(+), 107 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/common.h
++++ b/drivers/net/ethernet/stmicro/stmmac/common.h
+@@ -256,10 +256,10 @@ struct stmmac_safety_stats {
+ #define MAX_DMA_RIWT          0xff
+ #define MIN_DMA_RIWT          0x20
+ /* Tx coalesce parameters */
+-#define STMMAC_COAL_TX_TIMER  40000
++#define STMMAC_COAL_TX_TIMER  1000
+ #define STMMAC_MAX_COAL_TX_TICK       100000
+ #define STMMAC_TX_MAX_FRAMES  256
+-#define STMMAC_TX_FRAMES      64
++#define STMMAC_TX_FRAMES      25
+ 
+ /* Packets types */
+ enum packets_types {
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+@@ -48,6 +48,8 @@ struct stmmac_tx_info {
+ 
+ /* Frequently used values are kept adjacent for cache effect */
+ struct stmmac_tx_queue {
++      u32 tx_count_frames;
++      struct timer_list txtimer;
+       u32 queue_index;
+       struct stmmac_priv *priv_data;
+       struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp;
+@@ -73,7 +75,14 @@ struct stmmac_rx_queue {
+       u32 rx_zeroc_thresh;
+       dma_addr_t dma_rx_phy;
+       u32 rx_tail_addr;
++};
++
++struct stmmac_channel {
+       struct napi_struct napi ____cacheline_aligned_in_smp;
++      struct stmmac_priv *priv_data;
++      u32 index;
++      int has_rx;
++      int has_tx;
+ };
+ 
+ struct stmmac_tc_entry {
+@@ -109,14 +118,12 @@ struct stmmac_pps_cfg {
+ 
+ struct stmmac_priv {
+       /* Frequently used values are kept adjacent for cache effect */
+-      u32 tx_count_frames;
+       u32 tx_coal_frames;
+       u32 tx_coal_timer;
+ 
+       int tx_coalesce;
+       int hwts_tx_en;
+       bool tx_path_in_lpi_mode;
+-      struct timer_list txtimer;
+       bool tso;
+ 
+       unsigned int dma_buf_sz;
+@@ -137,6 +144,9 @@ struct stmmac_priv {
+       /* TX Queue */
+       struct stmmac_tx_queue tx_queue[MTL_MAX_TX_QUEUES];
+ 
++      /* Generic channel for NAPI */
++      struct stmmac_channel channel[STMMAC_CH_MAX];
++
+       bool oldlink;
+       int speed;
+       int oldduplex;
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -147,12 +147,14 @@ static void stmmac_verify_args(void)
+ static void stmmac_disable_all_queues(struct stmmac_priv *priv)
+ {
+       u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
++      u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
++      u32 maxq = max(rx_queues_cnt, tx_queues_cnt);
+       u32 queue;
+ 
+-      for (queue = 0; queue < rx_queues_cnt; queue++) {
+-              struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
++      for (queue = 0; queue < maxq; queue++) {
++              struct stmmac_channel *ch = &priv->channel[queue];
+ 
+-              napi_disable(&rx_q->napi);
++              napi_disable(&ch->napi);
+       }
+ }
+ 
+@@ -163,12 +165,14 @@ static void stmmac_disable_all_queues(st
+ static void stmmac_enable_all_queues(struct stmmac_priv *priv)
+ {
+       u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
++      u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
++      u32 maxq = max(rx_queues_cnt, tx_queues_cnt);
+       u32 queue;
+ 
+-      for (queue = 0; queue < rx_queues_cnt; queue++) {
+-              struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
++      for (queue = 0; queue < maxq; queue++) {
++              struct stmmac_channel *ch = &priv->channel[queue];
+ 
+-              napi_enable(&rx_q->napi);
++              napi_enable(&ch->napi);
+       }
+ }
+ 
+@@ -1822,18 +1826,18 @@ static void stmmac_dma_operation_mode(st
+  * @queue: TX queue index
+  * Description: it reclaims the transmit resources after transmission completes.
+  */
+-static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
++static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
+ {
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+       unsigned int bytes_compl = 0, pkts_compl = 0;
+-      unsigned int entry;
++      unsigned int entry, count = 0;
+ 
+-      netif_tx_lock(priv->dev);
++      __netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue));
+ 
+       priv->xstats.tx_clean++;
+ 
+       entry = tx_q->dirty_tx;
+-      while (entry != tx_q->cur_tx) {
++      while ((entry != tx_q->cur_tx) && (count < budget)) {
+               struct sk_buff *skb = tx_q->tx_skbuff[entry];
+               struct dma_desc *p;
+               int status;
+@@ -1849,6 +1853,8 @@ static void stmmac_tx_clean(struct stmma
+               if (unlikely(status & tx_dma_own))
+                       break;
+ 
++              count++;
++
+               /* Make sure descriptor fields are read after reading
+                * the own bit.
+                */
+@@ -1916,7 +1922,10 @@ static void stmmac_tx_clean(struct stmma
+               stmmac_enable_eee_mode(priv);
+               mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
+       }
+-      netif_tx_unlock(priv->dev);
++
++      __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
++
++      return count;
+ }
+ 
+ /**
+@@ -1999,6 +2008,33 @@ static bool stmmac_safety_feat_interrupt
+       return false;
+ }
+ 
++static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
++{
++      int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
++                                               &priv->xstats, chan);
++      struct stmmac_channel *ch = &priv->channel[chan];
++      bool needs_work = false;
++
++      if ((status & handle_rx) && ch->has_rx) {
++              needs_work = true;
++      } else {
++              status &= ~handle_rx;
++      }
++
++      if ((status & handle_tx) && ch->has_tx) {
++              needs_work = true;
++      } else {
++              status &= ~handle_tx;
++      }
++
++      if (needs_work && napi_schedule_prep(&ch->napi)) {
++              stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
++              __napi_schedule(&ch->napi);
++      }
++
++      return status;
++}
++
+ /**
+  * stmmac_dma_interrupt - DMA ISR
+  * @priv: driver private structure
+@@ -2013,57 +2049,14 @@ static void stmmac_dma_interrupt(struct
+       u32 channels_to_check = tx_channel_count > rx_channel_count ?
+                               tx_channel_count : rx_channel_count;
+       u32 chan;
+-      bool poll_scheduled = false;
+       int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)];
+ 
+       /* Make sure we never check beyond our status buffer. */
+       if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status)))
+               channels_to_check = ARRAY_SIZE(status);
+ 
+-      /* Each DMA channel can be used for rx and tx simultaneously, yet
+-       * napi_struct is embedded in struct stmmac_rx_queue rather than in a
+-       * stmmac_channel struct.
+-       * Because of this, stmmac_poll currently checks (and possibly wakes)
+-       * all tx queues rather than just a single tx queue.
+-       */
+       for (chan = 0; chan < channels_to_check; chan++)
+-              status[chan] = stmmac_dma_interrupt_status(priv, priv->ioaddr,
+-                              &priv->xstats, chan);
+-
+-      for (chan = 0; chan < rx_channel_count; chan++) {
+-              if (likely(status[chan] & handle_rx)) {
+-                      struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
+-
+-                      if (likely(napi_schedule_prep(&rx_q->napi))) {
+-                              stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+-                              __napi_schedule(&rx_q->napi);
+-                              poll_scheduled = true;
+-                      }
+-              }
+-      }
+-
+-      /* If we scheduled poll, we already know that tx queues will be checked.
+-       * If we didn't schedule poll, see if any DMA channel (used by tx) has a
+-       * completed transmission, if so, call stmmac_poll (once).
+-       */
+-      if (!poll_scheduled) {
+-              for (chan = 0; chan < tx_channel_count; chan++) {
+-                      if (status[chan] & handle_tx) {
+-                              /* It doesn't matter what rx queue we choose
+-                               * here. We use 0 since it always exists.
+-                               */
+-                              struct stmmac_rx_queue *rx_q =
+-                                      &priv->rx_queue[0];
+-
+-                              if (likely(napi_schedule_prep(&rx_q->napi))) {
+-                                      stmmac_disable_dma_irq(priv,
+-                                                      priv->ioaddr, chan);
+-                                      __napi_schedule(&rx_q->napi);
+-                              }
+-                              break;
+-                      }
+-              }
+-      }
++              status[chan] = stmmac_napi_check(priv, chan);
+ 
+       for (chan = 0; chan < tx_channel_count; chan++) {
+               if (unlikely(status[chan] & tx_hard_error_bump_tc)) {
+@@ -2211,6 +2204,13 @@ static int stmmac_init_dma_engine(struct
+       return ret;
+ }
+ 
++static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue)
++{
++      struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
++
++      mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer));
++}
++
+ /**
+  * stmmac_tx_timer - mitigation sw timer for tx.
+  * @data: data pointer
+@@ -2219,13 +2219,14 @@ static int stmmac_init_dma_engine(struct
+  */
+ static void stmmac_tx_timer(struct timer_list *t)
+ {
+-      struct stmmac_priv *priv = from_timer(priv, t, txtimer);
+-      u32 tx_queues_count = priv->plat->tx_queues_to_use;
+-      u32 queue;
++      struct stmmac_tx_queue *tx_q = from_timer(tx_q, t, txtimer);
++      struct stmmac_priv *priv = tx_q->priv_data;
++      struct stmmac_channel *ch;
+ 
+-      /* let's scan all the tx queues */
+-      for (queue = 0; queue < tx_queues_count; queue++)
+-              stmmac_tx_clean(priv, queue);
++      ch = &priv->channel[tx_q->queue_index];
++
++      if (likely(napi_schedule_prep(&ch->napi)))
++              __napi_schedule(&ch->napi);
+ }
+ 
+ /**
+@@ -2238,11 +2239,17 @@ static void stmmac_tx_timer(struct timer
+  */
+ static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
+ {
++      u32 tx_channel_count = priv->plat->tx_queues_to_use;
++      u32 chan;
++
+       priv->tx_coal_frames = STMMAC_TX_FRAMES;
+       priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
+-      timer_setup(&priv->txtimer, stmmac_tx_timer, 0);
+-      priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer);
+-      add_timer(&priv->txtimer);
++
++      for (chan = 0; chan < tx_channel_count; chan++) {
++              struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
++
++              timer_setup(&tx_q->txtimer, stmmac_tx_timer, 0);
++      }
+ }
+ 
+ static void stmmac_set_rings_length(struct stmmac_priv *priv)
+@@ -2570,6 +2577,7 @@ static void stmmac_hw_teardown(struct ne
+ static int stmmac_open(struct net_device *dev)
+ {
+       struct stmmac_priv *priv = netdev_priv(dev);
++      u32 chan;
+       int ret;
+ 
+       stmmac_check_ether_addr(priv);
+@@ -2666,7 +2674,9 @@ irq_error:
+       if (dev->phydev)
+               phy_stop(dev->phydev);
+ 
+-      del_timer_sync(&priv->txtimer);
++      for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
++              del_timer_sync(&priv->tx_queue[chan].txtimer);
++
+       stmmac_hw_teardown(dev);
+ init_error:
+       free_dma_desc_resources(priv);
+@@ -2686,6 +2696,7 @@ dma_desc_error:
+ static int stmmac_release(struct net_device *dev)
+ {
+       struct stmmac_priv *priv = netdev_priv(dev);
++      u32 chan;
+ 
+       if (priv->eee_enabled)
+               del_timer_sync(&priv->eee_ctrl_timer);
+@@ -2700,7 +2711,8 @@ static int stmmac_release(struct net_dev
+ 
+       stmmac_disable_all_queues(priv);
+ 
+-      del_timer_sync(&priv->txtimer);
++      for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
++              del_timer_sync(&priv->tx_queue[chan].txtimer);
+ 
+       /* Free the IRQ lines */
+       free_irq(dev->irq, dev);
+@@ -2914,14 +2926,13 @@ static netdev_tx_t stmmac_tso_xmit(struc
+       priv->xstats.tx_tso_nfrags += nfrags;
+ 
+       /* Manage tx mitigation */
+-      priv->tx_count_frames += nfrags + 1;
+-      if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
+-              mod_timer(&priv->txtimer,
+-                        STMMAC_COAL_TIMER(priv->tx_coal_timer));
+-      } else {
+-              priv->tx_count_frames = 0;
++      tx_q->tx_count_frames += nfrags + 1;
++      if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
+               stmmac_set_tx_ic(priv, desc);
+               priv->xstats.tx_set_ic_bit++;
++              tx_q->tx_count_frames = 0;
++      } else {
++              stmmac_tx_timer_arm(priv, queue);
+       }
+ 
+       skb_tx_timestamp(skb);
+@@ -3125,14 +3136,13 @@ static netdev_tx_t stmmac_xmit(struct sk
+        * This approach takes care about the fragments: desc is the first
+        * element in case of no SG.
+        */
+-      priv->tx_count_frames += nfrags + 1;
+-      if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
+-              mod_timer(&priv->txtimer,
+-                        STMMAC_COAL_TIMER(priv->tx_coal_timer));
+-      } else {
+-              priv->tx_count_frames = 0;
++      tx_q->tx_count_frames += nfrags + 1;
++      if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
+               stmmac_set_tx_ic(priv, desc);
+               priv->xstats.tx_set_ic_bit++;
++              tx_q->tx_count_frames = 0;
++      } else {
++              stmmac_tx_timer_arm(priv, queue);
+       }
+ 
+       skb_tx_timestamp(skb);
+@@ -3300,6 +3310,7 @@ static inline void stmmac_rx_refill(stru
+ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
+ {
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
++      struct stmmac_channel *ch = &priv->channel[queue];
+       unsigned int entry = rx_q->cur_rx;
+       int coe = priv->hw->rx_csum;
+       unsigned int next_entry;
+@@ -3469,7 +3480,7 @@ static int stmmac_rx(struct stmmac_priv
+                       else
+                               skb->ip_summed = CHECKSUM_UNNECESSARY;
+ 
+-                      napi_gro_receive(&rx_q->napi, skb);
++                      napi_gro_receive(&ch->napi, skb);
+ 
+                       priv->dev->stats.rx_packets++;
+                       priv->dev->stats.rx_bytes += frame_len;
+@@ -3492,27 +3503,33 @@ static int stmmac_rx(struct stmmac_priv
+  *  Description :
+  *  To look at the incoming frames and clear the tx resources.
+  */
+-static int stmmac_poll(struct napi_struct *napi, int budget)
++static int stmmac_napi_poll(struct napi_struct *napi, int budget)
+ {
+-      struct stmmac_rx_queue *rx_q =
+-              container_of(napi, struct stmmac_rx_queue, napi);
+-      struct stmmac_priv *priv = rx_q->priv_data;
+-      u32 tx_count = priv->plat->tx_queues_to_use;
+-      u32 chan = rx_q->queue_index;
+-      int work_done = 0;
+-      u32 queue;
++      struct stmmac_channel *ch =
++              container_of(napi, struct stmmac_channel, napi);
++      struct stmmac_priv *priv = ch->priv_data;
++      int work_done = 0, work_rem = budget;
++      u32 chan = ch->index;
+ 
+       priv->xstats.napi_poll++;
+ 
+-      /* check all the queues */
+-      for (queue = 0; queue < tx_count; queue++)
+-              stmmac_tx_clean(priv, queue);
+-
+-      work_done = stmmac_rx(priv, budget, rx_q->queue_index);
+-      if (work_done < budget) {
+-              napi_complete_done(napi, work_done);
+-              stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
++      if (ch->has_tx) {
++              int done = stmmac_tx_clean(priv, work_rem, chan);
++
++              work_done += done;
++              work_rem -= done;
++      }
++
++      if (ch->has_rx) {
++              int done = stmmac_rx(priv, work_rem, chan);
++
++              work_done += done;
++              work_rem -= done;
+       }
++
++      if (work_done < budget && napi_complete_done(napi, work_done))
++              stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
++
+       return work_done;
+ }
+ 
+@@ -4172,8 +4189,8 @@ int stmmac_dvr_probe(struct device *devi
+ {
+       struct net_device *ndev = NULL;
+       struct stmmac_priv *priv;
++      u32 queue, maxq;
+       int ret = 0;
+-      u32 queue;
+ 
+       ndev = alloc_etherdev_mqs(sizeof(struct stmmac_priv),
+                                 MTL_MAX_TX_QUEUES,
+@@ -4293,11 +4310,22 @@ int stmmac_dvr_probe(struct device *devi
+                        "Enable RX Mitigation via HW Watchdog Timer\n");
+       }
+ 
+-      for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
+-              struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
++      /* Setup channels NAPI */
++      maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use);
+ 
+-              netif_napi_add(ndev, &rx_q->napi, stmmac_poll,
+-                             (8 * priv->plat->rx_queues_to_use));
++      for (queue = 0; queue < maxq; queue++) {
++              struct stmmac_channel *ch = &priv->channel[queue];
++
++              ch->priv_data = priv;
++              ch->index = queue;
++
++              if (queue < priv->plat->rx_queues_to_use)
++                      ch->has_rx = true;
++              if (queue < priv->plat->tx_queues_to_use)
++                      ch->has_tx = true;
++
++              netif_napi_add(ndev, &ch->napi, stmmac_napi_poll,
++                             NAPI_POLL_WEIGHT);
+       }
+ 
+       mutex_init(&priv->lock);
+@@ -4343,10 +4371,10 @@ error_netdev_register:
+           priv->hw->pcs != STMMAC_PCS_RTBI)
+               stmmac_mdio_unregister(ndev);
+ error_mdio_register:
+-      for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
+-              struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
++      for (queue = 0; queue < maxq; queue++) {
++              struct stmmac_channel *ch = &priv->channel[queue];
+ 
+-              netif_napi_del(&rx_q->napi);
++              netif_napi_del(&ch->napi);
+       }
+ error_hw_init:
+       destroy_workqueue(priv->wq);
+--- a/include/linux/stmmac.h
++++ b/include/linux/stmmac.h
+@@ -30,6 +30,7 @@
+ 
+ #define MTL_MAX_RX_QUEUES     8
+ #define MTL_MAX_TX_QUEUES     8
++#define STMMAC_CH_MAX         8
+ 
+ #define STMMAC_RX_COE_NONE    0
+ #define STMMAC_RX_COE_TYPE1   1
diff --git a/queue-4.18/net-systemport-fix-wake-up-interrupt-race-during-resume.patch b/queue-4.18/net-systemport-fix-wake-up-interrupt-race-during-resume.patch

new file mode 100644 (file)

index 0000000..c7cfcb2
--- /dev/null
+++ b/queue-4.18/net-systemport-fix-wake-up-interrupt-race-during-resume.patch
@@ -0,0 +1,90 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Tue, 2 Oct 2018 16:52:03 -0700
+Subject: net: systemport: Fix wake-up interrupt race during resume
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 45ec318578c0c22a11f5b9927d064418e1ab1905 ]
+
+The AON_PM_L2 is normally used to trigger and identify the source of a
+wake-up event. Since the RX_SYS clock is no longer turned off, we also
+have an interrupt being sent to the SYSTEMPORT INTRL_2_0 controller, and
+that interrupt remains active up until the magic packet detector is
+disabled which happens much later during the driver resumption.
+
+The race happens if we have a CPU that is entering the SYSTEMPORT
+INTRL2_0 handler during resume, and another CPU has managed to clear the
+wake-up interrupt during bcm_sysport_resume_from_wol(). In that case, we
+have the first CPU stuck in the interrupt handler with an interrupt
+cause that has been cleared under its feet, and so we keep returning
+IRQ_NONE and we never make any progress.
+
+This was not a problem before because we would always turn off the
+RX_SYS clock during WoL, so the SYSTEMPORT INTRL2_0 would also be turned
+off as well, thus not latching the interrupt.
+
+The fix is to make sure we do not enable either the MPD or
+BRCM_TAG_MATCH interrupts since those are redundant with what the
+AON_PM_L2 interrupt controller already processes and they would cause
+such a race to occur.
+
+Fixes: bb9051a2b230 ("net: systemport: Add support for WAKE_FILTER")
+Fixes: 83e82f4c706b ("net: systemport: add Wake-on-LAN support")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bcmsysport.c |   22 +++++++++++-----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bcmsysport.c
++++ b/drivers/net/ethernet/broadcom/bcmsysport.c
+@@ -1045,14 +1045,22 @@ static void bcm_sysport_resume_from_wol(
+ {
+       u32 reg;
+ 
+-      /* Stop monitoring MPD interrupt */
+-      intrl2_0_mask_set(priv, INTRL2_0_MPD);
+-
+       /* Clear the MagicPacket detection logic */
+       reg = umac_readl(priv, UMAC_MPD_CTRL);
+       reg &= ~MPD_EN;
+       umac_writel(priv, reg, UMAC_MPD_CTRL);
+ 
++      reg = intrl2_0_readl(priv, INTRL2_CPU_STATUS);
++      if (reg & INTRL2_0_MPD)
++              netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n");
++
++      if (reg & INTRL2_0_BRCM_MATCH_TAG) {
++              reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) &
++                                RXCHK_BRCM_TAG_MATCH_MASK;
++              netdev_info(priv->netdev,
++                          "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg);
++      }
++
+       netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n");
+ }
+ 
+@@ -1102,11 +1110,6 @@ static irqreturn_t bcm_sysport_rx_isr(in
+       if (priv->irq0_stat & INTRL2_0_TX_RING_FULL)
+               bcm_sysport_tx_reclaim_all(priv);
+ 
+-      if (priv->irq0_stat & INTRL2_0_MPD) {
+-              netdev_info(priv->netdev, "Wake-on-LAN interrupt!\n");
+-              bcm_sysport_resume_from_wol(priv);
+-      }
+-
+       if (!priv->is_lite)
+               goto out;
+ 
+@@ -2459,9 +2462,6 @@ static int bcm_sysport_suspend_to_wol(st
+       /* UniMAC receive needs to be turned on */
+       umac_enable_set(priv, CMD_RX_EN, 1);
+ 
+-      /* Enable the interrupt wake-up source */
+-      intrl2_0_mask_clear(priv, INTRL2_0_MPD);
+-
+       netif_dbg(priv, wol, ndev, "entered WOL mode\n");
+ 
+       return 0;
diff --git a/queue-4.18/net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch b/queue-4.18/net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch

new file mode 100644 (file)

index 0000000..0124cd6
--- /dev/null
+++ b/queue-4.18/net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch
@@ -0,0 +1,46 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Yu Zhao <yuzhao@google.com>
+Date: Fri, 28 Sep 2018 17:04:30 -0600
+Subject: net/usb: cancel pending work when unbinding smsc75xx
+
+From: Yu Zhao <yuzhao@google.com>
+
+[ Upstream commit f7b2a56e1f3dcbdb4cf09b2b63e859ffe0e09df8 ]
+
+Cancel pending work before freeing smsc75xx private data structure
+during binding. This fixes the following crash in the driver:
+
+BUG: unable to handle kernel NULL pointer dereference at 0000000000000050
+IP: mutex_lock+0x2b/0x3f
+<snipped>
+Workqueue: events smsc75xx_deferred_multicast_write [smsc75xx]
+task: ffff8caa83e85700 task.stack: ffff948b80518000
+RIP: 0010:mutex_lock+0x2b/0x3f
+<snipped>
+Call Trace:
+ smsc75xx_deferred_multicast_write+0x40/0x1af [smsc75xx]
+ process_one_work+0x18d/0x2fc
+ worker_thread+0x1a2/0x269
+ ? pr_cont_work+0x58/0x58
+ kthread+0xfa/0x10a
+ ? pr_cont_work+0x58/0x58
+ ? rcu_read_unlock_sched_notrace+0x48/0x48
+ ret_from_fork+0x22/0x40
+
+Signed-off-by: Yu Zhao <yuzhao@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/smsc75xx.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/usb/smsc75xx.c
++++ b/drivers/net/usb/smsc75xx.c
+@@ -1517,6 +1517,7 @@ static void smsc75xx_unbind(struct usbne
+ {
+       struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
+       if (pdata) {
++              cancel_work_sync(&pdata->set_multicast);
+               netif_dbg(dev, ifdown, dev->net, "free pdata\n");
+               kfree(pdata);
+               pdata = NULL;
diff --git a/queue-4.18/netlabel-check-for-ipv4mask-in-addrinfo_get.patch b/queue-4.18/netlabel-check-for-ipv4mask-in-addrinfo_get.patch

new file mode 100644 (file)

index 0000000..9b76f51
--- /dev/null
+++ b/queue-4.18/netlabel-check-for-ipv4mask-in-addrinfo_get.patch
@@ -0,0 +1,61 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Sean Tranchetti <stranche@codeaurora.org>
+Date: Thu, 20 Sep 2018 14:29:45 -0600
+Subject: netlabel: check for IPV4MASK in addrinfo_get
+
+From: Sean Tranchetti <stranche@codeaurora.org>
+
+[ Upstream commit f88b4c01b97e09535505cf3c327fdbce55c27f00 ]
+
+netlbl_unlabel_addrinfo_get() assumes that if it finds the
+NLBL_UNLABEL_A_IPV4ADDR attribute, it must also have the
+NLBL_UNLABEL_A_IPV4MASK attribute as well. However, this is
+not necessarily the case as the current checks in
+netlbl_unlabel_staticadd() and friends are not sufficent to
+enforce this.
+
+If passed a netlink message with NLBL_UNLABEL_A_IPV4ADDR,
+NLBL_UNLABEL_A_IPV6ADDR, and NLBL_UNLABEL_A_IPV6MASK attributes,
+these functions will all call netlbl_unlabel_addrinfo_get() which
+will then attempt dereference NULL when fetching the non-existent
+NLBL_UNLABEL_A_IPV4MASK attribute:
+
+Unable to handle kernel NULL pointer dereference at virtual address 0
+Process unlab (pid: 31762, stack limit = 0xffffff80502d8000)
+Call trace:
+       netlbl_unlabel_addrinfo_get+0x44/0xd8
+       netlbl_unlabel_staticremovedef+0x98/0xe0
+       genl_rcv_msg+0x354/0x388
+       netlink_rcv_skb+0xac/0x118
+       genl_rcv+0x34/0x48
+       netlink_unicast+0x158/0x1f0
+       netlink_sendmsg+0x32c/0x338
+       sock_sendmsg+0x44/0x60
+       ___sys_sendmsg+0x1d0/0x2a8
+       __sys_sendmsg+0x64/0xb4
+       SyS_sendmsg+0x34/0x4c
+       el0_svc_naked+0x34/0x38
+Code: 51001149 7100113f 540000a0 f9401508 (79400108)
+---[ end trace f6438a488e737143 ]---
+Kernel panic - not syncing: Fatal exception
+
+Signed-off-by: Sean Tranchetti <stranche@codeaurora.org>
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlabel/netlabel_unlabeled.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/netlabel/netlabel_unlabeled.c
++++ b/net/netlabel/netlabel_unlabeled.c
+@@ -781,7 +781,8 @@ static int netlbl_unlabel_addrinfo_get(s
+ {
+       u32 addr_len;
+ 
+-      if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR]) {
++      if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] &&
++          info->attrs[NLBL_UNLABEL_A_IPV4MASK]) {
+               addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]);
+               if (addr_len != sizeof(struct in_addr) &&
+                   addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK]))
diff --git a/queue-4.18/nfp-avoid-soft-lockups-under-control-message-storm.patch b/queue-4.18/nfp-avoid-soft-lockups-under-control-message-storm.patch

new file mode 100644 (file)

index 0000000..3ea5bb7
--- /dev/null
+++ b/queue-4.18/nfp-avoid-soft-lockups-under-control-message-storm.patch
@@ -0,0 +1,63 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+Date: Tue, 2 Oct 2018 10:10:14 -0700
+Subject: nfp: avoid soft lockups under control message storm
+
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+
+[ Upstream commit ff58e2df62ce29d0552278c290ae494b30fe0c6f ]
+
+When FW floods the driver with control messages try to exit the cmsg
+processing loop every now and then to avoid soft lockups.  Cmsg
+processing is generally very lightweight so 512 seems like a reasonable
+budget, which should not be exceeded under normal conditions.
+
+Fixes: 77ece8d5f196 ("nfp: add control vNIC datapath")
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Reviewed-by: Simon Horman <simon.horman@netronome.com>
+Tested-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/netronome/nfp/nfp_net_common.c |   17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+@@ -2068,14 +2068,17 @@ nfp_ctrl_rx_one(struct nfp_net *nn, stru
+       return true;
+ }
+ 
+-static void nfp_ctrl_rx(struct nfp_net_r_vector *r_vec)
++static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec)
+ {
+       struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring;
+       struct nfp_net *nn = r_vec->nfp_net;
+       struct nfp_net_dp *dp = &nn->dp;
++      unsigned int budget = 512;
+ 
+-      while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring))
++      while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--)
+               continue;
++
++      return budget;
+ }
+ 
+ static void nfp_ctrl_poll(unsigned long arg)
+@@ -2087,9 +2090,13 @@ static void nfp_ctrl_poll(unsigned long
+       __nfp_ctrl_tx_queued(r_vec);
+       spin_unlock_bh(&r_vec->lock);
+ 
+-      nfp_ctrl_rx(r_vec);
+-
+-      nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
++      if (nfp_ctrl_rx(r_vec)) {
++              nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
++      } else {
++              tasklet_schedule(&r_vec->tasklet);
++              nn_dp_warn(&r_vec->nfp_net->dp,
++                         "control message budget exceeded!\n");
++      }
+ }
+ 
+ /* Setup and Configuration
diff --git a/queue-4.18/qed-fix-shmem-structure-inconsistency-between-driver-and-the-mfw.patch b/queue-4.18/qed-fix-shmem-structure-inconsistency-between-driver-and-the-mfw.patch

new file mode 100644 (file)

index 0000000..6a721d4
--- /dev/null
+++ b/queue-4.18/qed-fix-shmem-structure-inconsistency-between-driver-and-the-mfw.patch
@@ -0,0 +1,34 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
+Date: Thu, 27 Sep 2018 04:12:10 -0700
+Subject: qed: Fix shmem structure inconsistency between driver and the mfw.
+
+From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
+
+[ Upstream commit 5f672090e44f4951084c5e1d6b0668a5fc422af8 ]
+
+The structure shared between driver and the management FW (mfw) differ in
+sizes. This would lead to issues when driver try to access the structure
+members which are not-aligned with the mfw copy e.g., data_ptr usage in the
+case of mfw_tlv request.
+Align the driver structure with mfw copy, add reserved field(s) to driver
+structure for the members not used by the driver.
+
+Fixes: dd006921d67f ("qed: Add MFW interfaces for TLV request support.)
+Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
+Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qlogic/qed/qed_hsi.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+@@ -11987,6 +11987,7 @@ struct public_global {
+       u32 running_bundle_id;
+       s32 external_temperature;
+       u32 mdump_reason;
++      u64 reserved;
+       u32 data_ptr;
+       u32 data_size;
+ };
diff --git a/queue-4.18/qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch b/queue-4.18/qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch

new file mode 100644 (file)

index 0000000..1a09552
--- /dev/null
+++ b/queue-4.18/qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch
@@ -0,0 +1,149 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Shahed Shaikh <shahed.shaikh@cavium.com>
+Date: Wed, 26 Sep 2018 12:41:10 -0700
+Subject: qlcnic: fix Tx descriptor corruption on 82xx devices
+
+From: Shahed Shaikh <shahed.shaikh@cavium.com>
+
+[ Upstream commit c333fa0c4f220f8f7ea5acd6b0ebf3bf13fd684d ]
+
+In regular NIC transmission flow, driver always configures MAC using
+Tx queue zero descriptor as a part of MAC learning flow.
+But with multi Tx queue supported NIC, regular transmission can occur on
+any non-zero Tx queue and from that context it uses
+Tx queue zero descriptor to configure MAC, at the same time TX queue
+zero could be used by another CPU for regular transmission
+which could lead to Tx queue zero descriptor corruption and cause FW
+abort.
+
+This patch fixes this in such a way that driver always configures
+learned MAC address from the same Tx queue which is used for
+regular transmission.
+
+Fixes: 7e2cf4feba05 ("qlcnic: change driver hardware interface mechanism")
+Signed-off-by: Shahed Shaikh <shahed.shaikh@cavium.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic.h         |    8 +++++---
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c |    3 ++-
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h |    3 ++-
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h      |    3 ++-
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c      |   12 ++++++------
+ 5 files changed, 17 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+@@ -1800,7 +1800,8 @@ struct qlcnic_hardware_ops {
+       int (*config_loopback) (struct qlcnic_adapter *, u8);
+       int (*clear_loopback) (struct qlcnic_adapter *, u8);
+       int (*config_promisc_mode) (struct qlcnic_adapter *, u32);
+-      void (*change_l2_filter) (struct qlcnic_adapter *, u64 *, u16);
++      void (*change_l2_filter)(struct qlcnic_adapter *adapter, u64 *addr,
++                               u16 vlan, struct qlcnic_host_tx_ring *tx_ring);
+       int (*get_board_info) (struct qlcnic_adapter *);
+       void (*set_mac_filter_count) (struct qlcnic_adapter *);
+       void (*free_mac_list) (struct qlcnic_adapter *);
+@@ -2064,9 +2065,10 @@ static inline int qlcnic_nic_set_promisc
+ }
+ 
+ static inline void qlcnic_change_filter(struct qlcnic_adapter *adapter,
+-                                      u64 *addr, u16 id)
++                                      u64 *addr, u16 vlan,
++                                      struct qlcnic_host_tx_ring *tx_ring)
+ {
+-      adapter->ahw->hw_ops->change_l2_filter(adapter, addr, id);
++      adapter->ahw->hw_ops->change_l2_filter(adapter, addr, vlan, tx_ring);
+ }
+ 
+ static inline int qlcnic_get_board_info(struct qlcnic_adapter *adapter)
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+@@ -2135,7 +2135,8 @@ out:
+ }
+ 
+ void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr,
+-                                u16 vlan_id)
++                                u16 vlan_id,
++                                struct qlcnic_host_tx_ring *tx_ring)
+ {
+       u8 mac[ETH_ALEN];
+       memcpy(&mac, addr, ETH_ALEN);
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
+@@ -550,7 +550,8 @@ int qlcnic_83xx_wrt_reg_indirect(struct
+ int qlcnic_83xx_nic_set_promisc(struct qlcnic_adapter *, u32);
+ int qlcnic_83xx_config_hw_lro(struct qlcnic_adapter *, int);
+ int qlcnic_83xx_config_rss(struct qlcnic_adapter *, int);
+-void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *, u64 *, u16);
++void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr,
++                                u16 vlan, struct qlcnic_host_tx_ring *ring);
+ int qlcnic_83xx_get_pci_info(struct qlcnic_adapter *, struct qlcnic_pci_info *);
+ int qlcnic_83xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *);
+ void qlcnic_83xx_initialize_nic(struct qlcnic_adapter *, int);
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
+@@ -173,7 +173,8 @@ int qlcnic_82xx_napi_add(struct qlcnic_a
+                        struct net_device *netdev);
+ void qlcnic_82xx_get_beacon_state(struct qlcnic_adapter *);
+ void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter,
+-                             u64 *uaddr, u16 vlan_id);
++                             u64 *uaddr, u16 vlan_id,
++                             struct qlcnic_host_tx_ring *tx_ring);
+ int qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *,
+                                    struct ethtool_coalesce *);
+ int qlcnic_82xx_set_rx_coalesce(struct qlcnic_adapter *);
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+@@ -268,13 +268,12 @@ static void qlcnic_add_lb_filter(struct
+ }
+ 
+ void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr,
+-                             u16 vlan_id)
++                             u16 vlan_id, struct qlcnic_host_tx_ring *tx_ring)
+ {
+       struct cmd_desc_type0 *hwdesc;
+       struct qlcnic_nic_req *req;
+       struct qlcnic_mac_req *mac_req;
+       struct qlcnic_vlan_req *vlan_req;
+-      struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring;
+       u32 producer;
+       u64 word;
+ 
+@@ -301,7 +300,8 @@ void qlcnic_82xx_change_filter(struct ql
+ 
+ static void qlcnic_send_filter(struct qlcnic_adapter *adapter,
+                              struct cmd_desc_type0 *first_desc,
+-                             struct sk_buff *skb)
++                             struct sk_buff *skb,
++                             struct qlcnic_host_tx_ring *tx_ring)
+ {
+       struct vlan_ethhdr *vh = (struct vlan_ethhdr *)(skb->data);
+       struct ethhdr *phdr = (struct ethhdr *)(skb->data);
+@@ -335,7 +335,7 @@ static void qlcnic_send_filter(struct ql
+                   tmp_fil->vlan_id == vlan_id) {
+                       if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime))
+                               qlcnic_change_filter(adapter, &src_addr,
+-                                                   vlan_id);
++                                                   vlan_id, tx_ring);
+                       tmp_fil->ftime = jiffies;
+                       return;
+               }
+@@ -350,7 +350,7 @@ static void qlcnic_send_filter(struct ql
+       if (!fil)
+               return;
+ 
+-      qlcnic_change_filter(adapter, &src_addr, vlan_id);
++      qlcnic_change_filter(adapter, &src_addr, vlan_id, tx_ring);
+       fil->ftime = jiffies;
+       fil->vlan_id = vlan_id;
+       memcpy(fil->faddr, &src_addr, ETH_ALEN);
+@@ -766,7 +766,7 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_
+       }
+ 
+       if (adapter->drv_mac_learn)
+-              qlcnic_send_filter(adapter, first_desc, skb);
++              qlcnic_send_filter(adapter, first_desc, skb, tx_ring);
+ 
+       tx_ring->tx_stats.tx_bytes += skb->len;
+       tx_ring->tx_stats.xmit_called++;
diff --git a/queue-4.18/qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch b/queue-4.18/qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch

new file mode 100644 (file)

index 0000000..476e94e
--- /dev/null
+++ b/queue-4.18/qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch
@@ -0,0 +1,30 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Giacinto Cifelli <gciofono@gmail.com>
+Date: Wed, 10 Oct 2018 20:05:53 +0200
+Subject: qmi_wwan: Added support for Gemalto's Cinterion ALASxx WWAN interface
+
+From: Giacinto Cifelli <gciofono@gmail.com>
+
+[ Upstream commit 4f7617705bfff84d756fe4401a1f4f032f374984 ]
+
+Added support for Gemalto's Cinterion ALASxx WWAN interfaces
+by adding QMI_FIXED_INTF with Cinterion's VID and PID.
+
+Signed-off-by: Giacinto Cifelli <gciofono@gmail.com>
+Acked-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/qmi_wwan.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -1234,6 +1234,7 @@ static const struct usb_device_id produc
+       {QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)},    /* Olivetti Olicard 500 */
+       {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)},    /* Cinterion PLxx */
+       {QMI_FIXED_INTF(0x1e2d, 0x0053, 4)},    /* Cinterion PHxx,PXxx */
++      {QMI_FIXED_INTF(0x1e2d, 0x0063, 10)},   /* Cinterion ALASxx (1 RmNet) */
+       {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)},    /* Cinterion PHxx,PXxx (2 RmNet) */
+       {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)},    /* Cinterion PHxx,PXxx (2 RmNet) */
+       {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)},    /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/
diff --git a/queue-4.18/r8169-fix-network-stalls-due-to-missing-bit-txcfg_auto_fifo.patch b/queue-4.18/r8169-fix-network-stalls-due-to-missing-bit-txcfg_auto_fifo.patch

new file mode 100644 (file)

index 0000000..7ab5680
--- /dev/null
+++ b/queue-4.18/r8169-fix-network-stalls-due-to-missing-bit-txcfg_auto_fifo.patch
@@ -0,0 +1,104 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Heiner Kallweit <hkallweit1@gmail.com>
+Date: Fri, 28 Sep 2018 23:51:54 +0200
+Subject: r8169: fix network stalls due to missing bit TXCFG_AUTO_FIFO
+
+From: Heiner Kallweit <hkallweit1@gmail.com>
+
+[ Upstream commit ad5f97faff4231e72b96bd96adbe1b6e977a9b86 ]
+
+Some of the chip-specific hw_start functions set bit TXCFG_AUTO_FIFO
+in register TxConfig. The original patch changed the order of some
+calls resulting in these changes being overwritten by
+rtl_set_tx_config_registers() in rtl_hw_start(). This eventually
+resulted in network stalls especially under high load.
+
+Analyzing the chip-specific hw_start functions all chip version from
+34, with the exception of version 39, need this bit set.
+This patch moves setting this bit to rtl_set_tx_config_registers().
+
+Fixes: 4fd48c4ac0a0 ("r8169: move common initializations to tp->hw_start")
+Reported-by: Ortwin Glück <odi@odi.ch>
+Reported-by: David Arendt <admin@prnet.org>
+Root-caused-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
+Tested-by: Tony Atkinson <tatkinson@linux.com>
+Tested-by: David Arendt <admin@prnet.org>
+Tested-by: Ortwin Glück <odi@odi.ch>
+Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/r8169.c |   20 ++++++++------------
+ 1 file changed, 8 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/realtek/r8169.c
++++ b/drivers/net/ethernet/realtek/r8169.c
+@@ -5041,9 +5041,14 @@ static void rtl8169_hw_reset(struct rtl8
+ 
+ static void rtl_set_tx_config_registers(struct rtl8169_private *tp)
+ {
+-      /* Set DMA burst size and Interframe Gap Time */
+-      RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) |
+-              (InterFrameGap << TxInterFrameGapShift));
++      u32 val = TX_DMA_BURST << TxDMAShift |
++                InterFrameGap << TxInterFrameGapShift;
++
++      if (tp->mac_version >= RTL_GIGA_MAC_VER_34 &&
++          tp->mac_version != RTL_GIGA_MAC_VER_39)
++              val |= TXCFG_AUTO_FIFO;
++
++      RTL_W32(tp, TxConfig, val);
+ }
+ 
+ static void rtl_set_rx_max_size(struct rtl8169_private *tp)
+@@ -5530,7 +5535,6 @@ static void rtl_hw_start_8168e_2(struct
+ 
+       rtl_disable_clock_request(tp);
+ 
+-      RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
+ 
+       /* Adjust EEE LED frequency */
+@@ -5562,7 +5566,6 @@ static void rtl_hw_start_8168f(struct rt
+ 
+       rtl_disable_clock_request(tp);
+ 
+-      RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
+@@ -5607,8 +5610,6 @@ static void rtl_hw_start_8411(struct rtl
+ 
+ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
+ {
+-      RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+-
+       rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC);
+       rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
+       rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC);
+@@ -5707,8 +5708,6 @@ static void rtl_hw_start_8168h_1(struct
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
+       rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1));
+ 
+-      RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+-
+       rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
+       rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
+       rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC);
+@@ -5789,8 +5788,6 @@ static void rtl_hw_start_8168ep(struct r
+ {
+       rtl8168ep_stop_cmac(tp);
+ 
+-      RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+-
+       rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
+       rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC);
+       rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x5f, ERIAR_EXGMAC);
+@@ -6108,7 +6105,6 @@ static void rtl_hw_start_8402(struct rtl
+       /* Force LAN exit from ASPM if Rx/Tx are not idle */
+       RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
+ 
+-      RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
+ 
+       rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
diff --git a/queue-4.18/r8169-set-rx_multi_en-bit-in-rxconfig-for-8168f-family-chips.patch b/queue-4.18/r8169-set-rx_multi_en-bit-in-rxconfig-for-8168f-family-chips.patch

new file mode 100644 (file)

index 0000000..142357a
--- /dev/null
+++ b/queue-4.18/r8169-set-rx_multi_en-bit-in-rxconfig-for-8168f-family-chips.patch
@@ -0,0 +1,46 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: "Maciej S. Szmigiero" <mail@maciej.szmigiero.name>
+Date: Thu, 11 Oct 2018 16:02:10 +0200
+Subject: r8169: set RX_MULTI_EN bit in RxConfig for 8168F-family chips
+
+From: "Maciej S. Szmigiero" <mail@maciej.szmigiero.name>
+
+[ Upstream commit 511cfd580f23b0e0fcd5659931ef14c6e2c062b0 ]
+
+It has been reported that since
+commit 05212ba8132b42 ("r8169: set RxConfig after tx/rx is enabled for RTL8169sb/8110sb devices")
+at least RTL_GIGA_MAC_VER_38 NICs work erratically after a resume from
+suspend.
+The problem has been traced to a missing RX_MULTI_EN bit in the RxConfig
+register.
+We already set this bit for RTL_GIGA_MAC_VER_35 NICs of the same 8168F
+chip family so let's do it also for its other siblings: RTL_GIGA_MAC_VER_36
+and RTL_GIGA_MAC_VER_38.
+
+Curiously, the NIC seems to work fine after a system boot without having
+this bit set as long as the system isn't suspended and resumed.
+
+Fixes: 05212ba8132b42 ("r8169: set RxConfig after tx/rx is enabled for RTL8169sb/8110sb devices")
+Reported-by: Chris Clayton <chris2553@googlemail.com>
+Signed-off-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
+Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>
+Tested-by: Chris Clayton <chris2553@googlemail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/r8169.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/realtek/r8169.c
++++ b/drivers/net/ethernet/realtek/r8169.c
+@@ -4788,8 +4788,8 @@ static void rtl_init_rxcfg(struct rtl816
+               RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
+               break;
+       case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_24:
+-      case RTL_GIGA_MAC_VER_34:
+-      case RTL_GIGA_MAC_VER_35:
++      case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_36:
++      case RTL_GIGA_MAC_VER_38:
+               RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
+               break;
+       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
diff --git a/queue-4.18/rtnetlink-fail-dump-if-target-netnsid-is-invalid.patch b/queue-4.18/rtnetlink-fail-dump-if-target-netnsid-is-invalid.patch

new file mode 100644 (file)

index 0000000..0fc861c
--- /dev/null
+++ b/queue-4.18/rtnetlink-fail-dump-if-target-netnsid-is-invalid.patch
@@ -0,0 +1,36 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: David Ahern <dsahern@gmail.com>
+Date: Fri, 28 Sep 2018 12:28:41 -0700
+Subject: rtnetlink: Fail dump if target netnsid is invalid
+
+From: David Ahern <dsahern@gmail.com>
+
+[ Upstream commit 893626d6a353d1356528f94e081246ecf233d77a ]
+
+Link dumps can return results from a target namespace. If the namespace id
+is invalid, then the dump request should fail if get_target_net fails
+rather than continuing with a dump of the current namespace.
+
+Fixes: 79e1ad148c844 ("rtnetlink: use netnsid to query interface")
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -1848,10 +1848,8 @@ static int rtnl_dump_ifinfo(struct sk_bu
+               if (tb[IFLA_IF_NETNSID]) {
+                       netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
+                       tgt_net = get_target_net(skb->sk, netnsid);
+-                      if (IS_ERR(tgt_net)) {
+-                              tgt_net = net;
+-                              netnsid = -1;
+-                      }
++                      if (IS_ERR(tgt_net))
++                              return PTR_ERR(tgt_net);
+               }
+ 
+               if (tb[IFLA_EXT_MASK])
diff --git a/queue-4.18/rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch b/queue-4.18/rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch

new file mode 100644 (file)

index 0000000..a1ccf01
--- /dev/null
+++ b/queue-4.18/rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch
@@ -0,0 +1,167 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Mauricio Faria de Oliveira <mfo@canonical.com>
+Date: Mon, 1 Oct 2018 22:46:40 -0300
+Subject: rtnetlink: fix rtnl_fdb_dump() for ndmsg header
+
+From: Mauricio Faria de Oliveira <mfo@canonical.com>
+
+[ Upstream commit bd961c9bc66497f0c63f4ba1d02900bb85078366 ]
+
+Currently, rtnl_fdb_dump() assumes the family header is 'struct ifinfomsg',
+which is not always true -- 'struct ndmsg' is used by iproute2 ('ip neigh').
+
+The problem is, the function bails out early if nlmsg_parse() fails, which
+does occur for iproute2 usage of 'struct ndmsg' because the payload length
+is shorter than the family header alone (as 'struct ifinfomsg' is assumed).
+
+This breaks backward compatibility with userspace -- nothing is sent back.
+
+Some examples with iproute2 and netlink library for go [1]:
+
+ 1) $ bridge fdb show
+    33:33:00:00:00:01 dev ens3 self permanent
+    01:00:5e:00:00:01 dev ens3 self permanent
+    33:33:ff:15:98:30 dev ens3 self permanent
+
+      This one works, as it uses 'struct ifinfomsg'.
+
+      fdb_show() @ iproute2/bridge/fdb.c
+        """
+        .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+        ...
+        if (rtnl_dump_request(&rth, RTM_GETNEIGH, [...]
+        """
+
+ 2) $ ip --family bridge neigh
+    RTNETLINK answers: Invalid argument
+    Dump terminated
+
+      This one fails, as it uses 'struct ndmsg'.
+
+      do_show_or_flush() @ iproute2/ip/ipneigh.c
+        """
+        .n.nlmsg_type = RTM_GETNEIGH,
+        .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
+        """
+
+ 3) $ ./neighlist
+    < no output >
+
+      This one fails, as it uses 'struct ndmsg'-based.
+
+      neighList() @ netlink/neigh_linux.go
+        """
+        req := h.newNetlinkRequest(unix.RTM_GETNEIGH, [...]
+        msg := Ndmsg{
+        """
+
+The actual breakage was introduced by commit 0ff50e83b512 ("net: rtnetlink:
+bail out from rtnl_fdb_dump() on parse error"), because nlmsg_parse() fails
+if the payload length (with the _actual_ family header) is less than the
+family header length alone (which is assumed, in parameter 'hdrlen').
+This is true in the examples above with struct ndmsg, with size and payload
+length shorter than struct ifinfomsg.
+
+However, that commit just intends to fix something under the assumption the
+family header is indeed an 'struct ifinfomsg' - by preventing access to the
+payload as such (via 'ifm' pointer) if the payload length is not sufficient
+to actually contain it.
+
+The assumption was introduced by commit 5e6d24358799 ("bridge: netlink dump
+interface at par with brctl"), to support iproute2's 'bridge fdb' command
+(not 'ip neigh') which indeed uses 'struct ifinfomsg', thus is not broken.
+
+So, in order to unbreak the 'struct ndmsg' family headers and still allow
+'struct ifinfomsg' to continue to work, check for the known message sizes
+used with 'struct ndmsg' in iproute2 (with zero or one attribute which is
+not used in this function anyway) then do not parse the data as ifinfomsg.
+
+Same examples with this patch applied (or revert/before the original fix):
+
+    $ bridge fdb show
+    33:33:00:00:00:01 dev ens3 self permanent
+    01:00:5e:00:00:01 dev ens3 self permanent
+    33:33:ff:15:98:30 dev ens3 self permanent
+
+    $ ip --family bridge neigh
+    dev ens3 lladdr 33:33:00:00:00:01 PERMANENT
+    dev ens3 lladdr 01:00:5e:00:00:01 PERMANENT
+    dev ens3 lladdr 33:33:ff:15:98:30 PERMANENT
+
+    $ ./neighlist
+    netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0x0, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
+    netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x1, 0x0, 0x5e, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
+    netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0xff, 0x15, 0x98, 0x30}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
+
+Tested on mainline (v4.19-rc6) and net-next (3bd09b05b068).
+
+References:
+
+[1] netlink library for go (test-case)
+    https://github.com/vishvananda/netlink
+
+    $ cat ~/go/src/neighlist/main.go
+    package main
+    import ("fmt"; "syscall"; "github.com/vishvananda/netlink")
+    func main() {
+        neighs, _ := netlink.NeighList(0, syscall.AF_BRIDGE)
+        for _, neigh := range neighs { fmt.Printf("%#v\n", neigh) }
+    }
+
+    $ export GOPATH=~/go
+    $ go get github.com/vishvananda/netlink
+    $ go build neighlist
+    $ ~/go/src/neighlist/neighlist
+
+Thanks to David Ahern for suggestions to improve this patch.
+
+Fixes: 0ff50e83b512 ("net: rtnetlink: bail out from rtnl_fdb_dump() on parse error")
+Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl")
+Reported-by: Aidan Obley <aobley@pivotal.io>
+Signed-off-by: Mauricio Faria de Oliveira <mfo@canonical.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |   29 ++++++++++++++++++++---------
+ 1 file changed, 20 insertions(+), 9 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3694,16 +3694,27 @@ static int rtnl_fdb_dump(struct sk_buff
+       int err = 0;
+       int fidx = 0;
+ 
+-      err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
+-                        IFLA_MAX, ifla_policy, NULL);
+-      if (err < 0) {
+-              return -EINVAL;
+-      } else if (err == 0) {
+-              if (tb[IFLA_MASTER])
+-                      br_idx = nla_get_u32(tb[IFLA_MASTER]);
+-      }
++      /* A hack to preserve kernel<->userspace interface.
++       * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0.
++       * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails.
++       * So, check for ndmsg with an optional u32 attribute (not used here).
++       * Fortunately these sizes don't conflict with the size of ifinfomsg
++       * with an optional attribute.
++       */
++      if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) &&
++          (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) +
++           nla_attr_size(sizeof(u32)))) {
++              err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
++                                IFLA_MAX, ifla_policy, NULL);
++              if (err < 0) {
++                      return -EINVAL;
++              } else if (err == 0) {
++                      if (tb[IFLA_MASTER])
++                              br_idx = nla_get_u32(tb[IFLA_MASTER]);
++              }
+ 
+-      brport_idx = ifm->ifi_index;
++              brport_idx = ifm->ifi_index;
++      }
+ 
+       if (br_idx) {
+               br_dev = __dev_get_by_index(net, br_idx);
diff --git a/queue-4.18/rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch b/queue-4.18/rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch

new file mode 100644 (file)

index 0000000..a506ef7
--- /dev/null
+++ b/queue-4.18/rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch
@@ -0,0 +1,54 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 2 Oct 2018 15:47:35 -0700
+Subject: rtnl: limit IFLA_NUM_TX_QUEUES and IFLA_NUM_RX_QUEUES to 4096
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 0e1d6eca5113858ed2caea61a5adc03c595f6096 ]
+
+We have an impressive number of syzkaller bugs that are linked
+to the fact that syzbot was able to create a networking device
+with millions of TX (or RX) queues.
+
+Let's limit the number of RX/TX queues to 4096, this really should
+cover all known cases.
+
+A separate patch will add various cond_resched() in the loops
+handling sysfs entries at device creation and dismantle.
+
+Tested:
+
+lpaa6:~# ip link add gre-4097 numtxqueues 4097 numrxqueues 4097 type ip6gretap
+RTNETLINK answers: Invalid argument
+
+lpaa6:~# time ip link add gre-4096 numtxqueues 4096 numrxqueues 4096 type ip6gretap
+
+real   0m0.180s
+user   0m0.000s
+sys    0m0.107s
+
+Fixes: 76ff5cc91935 ("rtnl: allow to specify number of rx and tx queues on device creation")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -2787,6 +2787,12 @@ struct net_device *rtnl_create_link(stru
+       else if (ops->get_num_rx_queues)
+               num_rx_queues = ops->get_num_rx_queues();
+ 
++      if (num_tx_queues < 1 || num_tx_queues > 4096)
++              return ERR_PTR(-EINVAL);
++
++      if (num_rx_queues < 1 || num_rx_queues > 4096)
++              return ERR_PTR(-EINVAL);
++
+       dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
+                              ops->setup, num_tx_queues, num_rx_queues);
+       if (!dev)
diff --git a/queue-4.18/sctp-update-dst-pmtu-with-the-correct-daddr.patch b/queue-4.18/sctp-update-dst-pmtu-with-the-correct-daddr.patch

new file mode 100644 (file)

index 0000000..773c87d
--- /dev/null
+++ b/queue-4.18/sctp-update-dst-pmtu-with-the-correct-daddr.patch
@@ -0,0 +1,65 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 20 Sep 2018 17:27:28 +0800
+Subject: sctp: update dst pmtu with the correct daddr
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit d7ab5cdce54da631f0c8c11e506c974536a3581e ]
+
+When processing pmtu update from an icmp packet, it calls .update_pmtu
+with sk instead of skb in sctp_transport_update_pmtu.
+
+However for sctp, the daddr in the transport might be different from
+inet_sock->inet_daddr or sk->sk_v6_daddr, which is used to update or
+create the route cache. The incorrect daddr will cause a different
+route cache created for the path.
+
+So before calling .update_pmtu, inet_sock->inet_daddr/sk->sk_v6_daddr
+should be updated with the daddr in the transport, and update it back
+after it's done.
+
+The issue has existed since route exceptions introduction.
+
+Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions.")
+Reported-by: ian.periam@dialogic.com
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/transport.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/net/sctp/transport.c
++++ b/net/sctp/transport.c
+@@ -260,6 +260,7 @@ void sctp_transport_pmtu(struct sctp_tra
+ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
+ {
+       struct dst_entry *dst = sctp_transport_dst_check(t);
++      struct sock *sk = t->asoc->base.sk;
+       bool change = true;
+ 
+       if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
+@@ -271,12 +272,19 @@ bool sctp_transport_update_pmtu(struct s
+       pmtu = SCTP_TRUNC4(pmtu);
+ 
+       if (dst) {
+-              dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
++              struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family);
++              union sctp_addr addr;
++
++              pf->af->from_sk(&addr, sk);
++              pf->to_sk_daddr(&t->ipaddr, sk);
++              dst->ops->update_pmtu(dst, sk, NULL, pmtu);
++              pf->to_sk_daddr(&addr, sk);
++
+               dst = sctp_transport_dst_check(t);
+       }
+ 
+       if (!dst) {
+-              t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
++              t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
+               dst = t->dst;
+       }
+ 
diff --git a/queue-4.18/series b/queue-4.18/series

new file mode 100644 (file)

index 0000000..d306c79
--- /dev/null
+++ b/queue-4.18/series
@@ -0,0 +1,62 @@
+bnxt_en-fix-tx-timeout-during-netpoll.patch
+bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch
+bonding-avoid-possible-dead-lock.patch
+ip6_tunnel-be-careful-when-accessing-the-inner-header.patch
+ip_tunnel-be-careful-when-accessing-the-inner-header.patch
+ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch
+ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch
+net-dsa-bcm_sf2-call-setup-during-switch-resume.patch
+net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch
+net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch
+net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch
+netlabel-check-for-ipv4mask-in-addrinfo_get.patch
+net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch
+net-mvpp2-fix-a-txq_done-race-condition.patch
+net-sched-add-policy-validation-for-tc-attributes.patch
+net-sched-cls_u32-fix-hnode-refcounting.patch
+net-systemport-fix-wake-up-interrupt-race-during-resume.patch
+net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch
+qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch
+qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch
+rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch
+rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch
+sctp-update-dst-pmtu-with-the-correct-daddr.patch
+team-forbid-enslaving-team-device-to-itself.patch
+tipc-fix-flow-control-accounting-for-implicit-connect.patch
+udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch
+net-qualcomm-rmnet-skip-processing-loopback-packets.patch
+net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-transmit.patch
+net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-receive-path.patch
+tun-remove-unused-parameters.patch
+tun-initialize-napi_mutex-unconditionally.patch
+tun-napi-flags-belong-to-tfile.patch
+net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch
+net-packet-fix-packet-drop-as-of-virtio-gso.patch
+net-dsa-bcm_sf2-fix-unbind-ordering.patch
+net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch
+net-aquantia-memory-corruption-on-jumbo-frames.patch
+net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch
+bonding-pass-link-local-packets-to-bonding-master-also.patch
+bonding-fix-warning-message.patch
+net-stmmac-rework-coalesce-timer-and-fix-multi-queue-races.patch
+nfp-avoid-soft-lockups-under-control-message-storm.patch
+bnxt_en-don-t-try-to-offload-vlan-modify-action.patch
+net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch
+net-phy-phylink-fix-sfp-interface-autodetection.patch
+sfp-fix-oops-with-ethtool-m.patch
+tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch
+inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch
+net-dsa-b53-keep-cpu-port-as-tagged-in-all-vlans.patch
+rtnetlink-fail-dump-if-target-netnsid-is-invalid.patch
+bnxt_en-fix-vnic-reservations-on-the-pf.patch
+net-ipv4-don-t-let-pmtu-updates-increase-route-mtu.patch
+net-mlx5-check-for-sq-and-not-rq-state-when-modifying-hairpin-sq.patch
+bnxt_en-fix-enables-field-in-hwrm_queue_cos2bw_cfg-request.patch
+bnxt_en-get-the-reduced-max_irqs-by-the-ones-used-by-rdma.patch
+net-ipv6-remove-extra-call-to-ip6_convert_metrics-for-multipath-case.patch
+net-ipv6-stop-leaking-percpu-memory-in-fib6-info.patch
+net-mscc-fix-the-frame-extraction-into-the-skb.patch
+qed-fix-shmem-structure-inconsistency-between-driver-and-the-mfw.patch
+r8169-fix-network-stalls-due-to-missing-bit-txcfg_auto_fifo.patch
+r8169-set-rx_multi_en-bit-in-rxconfig-for-8168f-family-chips.patch
+vxlan-fill-ttl-inherit-info.patch
diff --git a/queue-4.18/sfp-fix-oops-with-ethtool-m.patch b/queue-4.18/sfp-fix-oops-with-ethtool-m.patch

new file mode 100644 (file)

index 0000000..a6d8c5e
--- /dev/null
+++ b/queue-4.18/sfp-fix-oops-with-ethtool-m.patch
@@ -0,0 +1,71 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Tue, 18 Sep 2018 16:48:53 +0100
+Subject: sfp: fix oops with ethtool -m
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+[ Upstream commit 126d6848ef13958e1cb959e96c21d19bc498ade9 ]
+
+If a network interface is created prior to the SFP socket being
+available, ethtool can request module information.  This unfortunately
+leads to an oops:
+
+Unable to handle kernel NULL pointer dereference at virtual address 00000008
+pgd = (ptrval)
+[00000008] *pgd=7c400831, *pte=00000000, *ppte=00000000
+Internal error: Oops: 17 [#1] SMP ARM
+Modules linked in:
+CPU: 0 PID: 1480 Comm: ethtool Not tainted 4.19.0-rc3 #138
+Hardware name: Broadcom Northstar Plus SoC
+PC is at sfp_get_module_info+0x8/0x10
+LR is at dev_ethtool+0x218c/0x2afc
+
+Fix this by not filling in the network device's SFP bus pointer until
+SFP is fully bound, thereby avoiding the core calling into the SFP bus
+code.
+
+Fixes: ce0aa27ff3f6 ("sfp: add sfp-bus to bridge between network devices and sfp cages")
+Reported-by: Florian Fainelli <f.fainelli@gmail.com>
+Tested-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/sfp-bus.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/phy/sfp-bus.c
++++ b/drivers/net/phy/sfp-bus.c
+@@ -349,6 +349,7 @@ static int sfp_register_bus(struct sfp_b
+       }
+       if (bus->started)
+               bus->socket_ops->start(bus->sfp);
++      bus->netdev->sfp_bus = bus;
+       bus->registered = true;
+       return 0;
+ }
+@@ -357,6 +358,7 @@ static void sfp_unregister_bus(struct sf
+ {
+       const struct sfp_upstream_ops *ops = bus->upstream_ops;
+ 
++      bus->netdev->sfp_bus = NULL;
+       if (bus->registered) {
+               if (bus->started)
+                       bus->socket_ops->stop(bus->sfp);
+@@ -438,7 +440,6 @@ static void sfp_upstream_clear(struct sf
+ {
+       bus->upstream_ops = NULL;
+       bus->upstream = NULL;
+-      bus->netdev->sfp_bus = NULL;
+       bus->netdev = NULL;
+ }
+ 
+@@ -467,7 +468,6 @@ struct sfp_bus *sfp_register_upstream(st
+               bus->upstream_ops = ops;
+               bus->upstream = upstream;
+               bus->netdev = ndev;
+-              ndev->sfp_bus = bus;
+ 
+               if (bus->sfp) {
+                       ret = sfp_register_bus(bus);
diff --git a/queue-4.18/tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch b/queue-4.18/tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch

new file mode 100644 (file)

index 0000000..e4d1917
--- /dev/null
+++ b/queue-4.18/tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch
@@ -0,0 +1,85 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 1 Oct 2018 15:02:26 -0700
+Subject: tcp/dccp: fix lockdep issue when SYN is backlogged
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 1ad98e9d1bdf4724c0a8532fabd84bf3c457c2bc ]
+
+In normal SYN processing, packets are handled without listener
+lock and in RCU protected ingress path.
+
+But syzkaller is known to be able to trick us and SYN
+packets might be processed in process context, after being
+queued into socket backlog.
+
+In commit 06f877d613be ("tcp/dccp: fix other lockdep splats
+accessing ireq_opt") I made a very stupid fix, that happened
+to work mostly because of the regular path being RCU protected.
+
+Really the thing protecting ireq->ireq_opt is RCU read lock,
+and the pseudo request refcnt is not relevant.
+
+This patch extends what I did in commit 449809a66c1d ("tcp/dccp:
+block BH for SYN processing") by adding an extra rcu_read_{lock|unlock}
+pair in the paths that might be taken when processing SYN from
+socket backlog (thus possibly in process context)
+
+Fixes: 06f877d613be ("tcp/dccp: fix other lockdep splats accessing ireq_opt")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_sock.h |    3 +--
+ net/dccp/input.c        |    4 +++-
+ net/ipv4/tcp_input.c    |    4 +++-
+ 3 files changed, 7 insertions(+), 4 deletions(-)
+
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -132,8 +132,7 @@ static inline int inet_request_bound_dev
+ 
+ static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq)
+ {
+-      return rcu_dereference_check(ireq->ireq_opt,
+-                                   refcount_read(&ireq->req.rsk_refcnt) > 0);
++      return rcu_dereference(ireq->ireq_opt);
+ }
+ 
+ struct inet_cork {
+--- a/net/dccp/input.c
++++ b/net/dccp/input.c
+@@ -606,11 +606,13 @@ int dccp_rcv_state_process(struct sock *
+       if (sk->sk_state == DCCP_LISTEN) {
+               if (dh->dccph_type == DCCP_PKT_REQUEST) {
+                       /* It is possible that we process SYN packets from backlog,
+-                       * so we need to make sure to disable BH right there.
++                       * so we need to make sure to disable BH and RCU right there.
+                        */
++                      rcu_read_lock();
+                       local_bh_disable();
+                       acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0;
+                       local_bh_enable();
++                      rcu_read_unlock();
+                       if (!acceptable)
+                               return 1;
+                       consume_skb(skb);
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5976,11 +5976,13 @@ int tcp_rcv_state_process(struct sock *s
+                       if (th->fin)
+                               goto discard;
+                       /* It is possible that we process SYN packets from backlog,
+-                       * so we need to make sure to disable BH right there.
++                       * so we need to make sure to disable BH and RCU right there.
+                        */
++                      rcu_read_lock();
+                       local_bh_disable();
+                       acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
+                       local_bh_enable();
++                      rcu_read_unlock();
+ 
+                       if (!acceptable)
+                               return 1;
diff --git a/queue-4.18/team-forbid-enslaving-team-device-to-itself.patch b/queue-4.18/team-forbid-enslaving-team-device-to-itself.patch

new file mode 100644 (file)

index 0000000..ba645a5
--- /dev/null
+++ b/queue-4.18/team-forbid-enslaving-team-device-to-itself.patch
@@ -0,0 +1,125 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Mon, 1 Oct 2018 12:21:59 +0300
+Subject: team: Forbid enslaving team device to itself
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit 471b83bd8bbe4e89743683ef8ecb78f7029d8288 ]
+
+team's ndo_add_slave() acquires 'team->lock' and later tries to open the
+newly enslaved device via dev_open(). This emits a 'NETDEV_UP' event
+that causes the VLAN driver to add VLAN 0 on the team device. team's
+ndo_vlan_rx_add_vid() will also try to acquire 'team->lock' and
+deadlock.
+
+Fix this by checking early at the enslavement function that a team
+device is not being enslaved to itself.
+
+A similar check was added to the bond driver in commit 09a89c219baf
+("bonding: disallow enslaving a bond to itself").
+
+WARNING: possible recursive locking detected
+4.18.0-rc7+ #176 Not tainted
+--------------------------------------------
+syz-executor4/6391 is trying to acquire lock:
+(____ptrval____) (&team->lock){+.+.}, at: team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868
+
+but task is already holding lock:
+(____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947
+
+other info that might help us debug this:
+ Possible unsafe locking scenario:
+
+       CPU0
+       ----
+  lock(&team->lock);
+  lock(&team->lock);
+
+ *** DEADLOCK ***
+
+ May be due to missing lock nesting notation
+
+2 locks held by syz-executor4/6391:
+ #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline]
+ #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4662
+ #1: (____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947
+
+stack backtrace:
+CPU: 1 PID: 6391 Comm: syz-executor4 Not tainted 4.18.0-rc7+ #176
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
+ print_deadlock_bug kernel/locking/lockdep.c:1765 [inline]
+ check_deadlock kernel/locking/lockdep.c:1809 [inline]
+ validate_chain kernel/locking/lockdep.c:2405 [inline]
+ __lock_acquire.cold.64+0x1fb/0x486 kernel/locking/lockdep.c:3435
+ lock_acquire+0x1e4/0x540 kernel/locking/lockdep.c:3924
+ __mutex_lock_common kernel/locking/mutex.c:757 [inline]
+ __mutex_lock+0x176/0x1820 kernel/locking/mutex.c:894
+ mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:909
+ team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868
+ vlan_add_rx_filter_info+0x14a/0x1d0 net/8021q/vlan_core.c:210
+ __vlan_vid_add net/8021q/vlan_core.c:278 [inline]
+ vlan_vid_add+0x63e/0x9d0 net/8021q/vlan_core.c:308
+ vlan_device_event.cold.12+0x2a/0x2f net/8021q/vlan.c:381
+ notifier_call_chain+0x180/0x390 kernel/notifier.c:93
+ __raw_notifier_call_chain kernel/notifier.c:394 [inline]
+ raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
+ call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1735
+ call_netdevice_notifiers net/core/dev.c:1753 [inline]
+ dev_open+0x173/0x1b0 net/core/dev.c:1433
+ team_port_add drivers/net/team/team.c:1219 [inline]
+ team_add_slave+0xa8b/0x1c30 drivers/net/team/team.c:1948
+ do_set_master+0x1c9/0x220 net/core/rtnetlink.c:2248
+ do_setlink+0xba4/0x3e10 net/core/rtnetlink.c:2382
+ rtnl_setlink+0x2a9/0x400 net/core/rtnetlink.c:2636
+ rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4665
+ netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2455
+ rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4683
+ netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
+ netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343
+ netlink_sendmsg+0xa18/0xfd0 net/netlink/af_netlink.c:1908
+ sock_sendmsg_nosec net/socket.c:642 [inline]
+ sock_sendmsg+0xd5/0x120 net/socket.c:652
+ ___sys_sendmsg+0x7fd/0x930 net/socket.c:2126
+ __sys_sendmsg+0x11d/0x290 net/socket.c:2164
+ __do_sys_sendmsg net/socket.c:2173 [inline]
+ __se_sys_sendmsg net/socket.c:2171 [inline]
+ __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2171
+ do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x456b29
+Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007f9706bf8c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 00007f9706bf96d4 RCX: 0000000000456b29
+RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000004
+RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
+R13: 00000000004d3548 R14: 00000000004c8227 R15: 0000000000000000
+
+Fixes: 87002b03baab ("net: introduce vlan_vid_[add/del] and use them instead of direct [add/kill]_vid ndo calls")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reported-and-tested-by: syzbot+bd051aba086537515cdb@syzkaller.appspotmail.com
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/team/team.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/net/team/team.c
++++ b/drivers/net/team/team.c
+@@ -1172,6 +1172,12 @@ static int team_port_add(struct team *te
+               return -EBUSY;
+       }
+ 
++      if (dev == port_dev) {
++              NL_SET_ERR_MSG(extack, "Cannot enslave team device to itself");
++              netdev_err(dev, "Cannot enslave team device to itself\n");
++              return -EINVAL;
++      }
++
+       if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
+           vlan_uses_dev(dev)) {
+               NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
diff --git a/queue-4.18/tipc-fix-flow-control-accounting-for-implicit-connect.patch b/queue-4.18/tipc-fix-flow-control-accounting-for-implicit-connect.patch

new file mode 100644 (file)

index 0000000..742f968
--- /dev/null
+++ b/queue-4.18/tipc-fix-flow-control-accounting-for-implicit-connect.patch
@@ -0,0 +1,41 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
+Date: Tue, 25 Sep 2018 18:21:58 +0200
+Subject: tipc: fix flow control accounting for implicit connect
+
+From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
+
+[ Upstream commit 92ef12b32feab8f277b69e9fb89ede2796777f4d ]
+
+In the case of implicit connect message with data > 1K, the flow
+control accounting is incorrect. At this state, the socket does not
+know the peer nodes capability and falls back to legacy flow control
+by return 1, however the receiver of this message will perform the
+new block accounting. This leads to a slack and eventually traffic
+disturbance.
+
+In this commit, we perform tipc_node_get_capabilities() at implicit
+connect and perform accounting based on the peer's capability.
+
+Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
+Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/socket.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -1422,8 +1422,10 @@ static int __tipc_sendstream(struct sock
+       /* Handle implicit connection setup */
+       if (unlikely(dest)) {
+               rc = __tipc_sendmsg(sock, m, dlen);
+-              if (dlen && (dlen == rc))
++              if (dlen && dlen == rc) {
++                      tsk->peer_caps = tipc_node_get_capabilities(net, dnode);
+                       tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr));
++              }
+               return rc;
+       }
+ 
diff --git a/queue-4.18/tun-initialize-napi_mutex-unconditionally.patch b/queue-4.18/tun-initialize-napi_mutex-unconditionally.patch

new file mode 100644 (file)

index 0000000..16eed27
--- /dev/null
+++ b/queue-4.18/tun-initialize-napi_mutex-unconditionally.patch
@@ -0,0 +1,45 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 28 Sep 2018 14:51:48 -0700
+Subject: tun: initialize napi_mutex unconditionally
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit c7256f579f8302ce2c038181c30060d0b40017b2 ]
+
+This is the first part to fix following syzbot report :
+
+console output: https://syzkaller.appspot.com/x/log.txt?x=145378e6400000
+kernel config:  https://syzkaller.appspot.com/x/.config?x=443816db871edd66
+dashboard link: https://syzkaller.appspot.com/bug?extid=e662df0ac1d753b57e80
+
+Following patch is fixing the race condition, but it seems safer
+to initialize this mutex at tfile creation anyway.
+
+Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot+e662df0ac1d753b57e80@syzkaller.appspotmail.com
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -319,7 +319,6 @@ static void tun_napi_init(struct tun_str
+               netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
+                              NAPI_POLL_WEIGHT);
+               napi_enable(&tfile->napi);
+-              mutex_init(&tfile->napi_mutex);
+       }
+ }
+ 
+@@ -3241,6 +3240,7 @@ static int tun_chr_open(struct inode *in
+               return -ENOMEM;
+       }
+ 
++      mutex_init(&tfile->napi_mutex);
+       RCU_INIT_POINTER(tfile->tun, NULL);
+       tfile->flags = 0;
+       tfile->ifindex = 0;
diff --git a/queue-4.18/tun-napi-flags-belong-to-tfile.patch b/queue-4.18/tun-napi-flags-belong-to-tfile.patch

new file mode 100644 (file)

index 0000000..5868c02
--- /dev/null
+++ b/queue-4.18/tun-napi-flags-belong-to-tfile.patch
@@ -0,0 +1,189 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 28 Sep 2018 14:51:49 -0700
+Subject: tun: napi flags belong to tfile
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit af3fb24eecb2c59246e03c99386037fd5ad84ffd ]
+
+Since tun->flags might be shared by multiple tfile structures,
+it is better to make sure tun_get_user() is using the flags
+for the current tfile.
+
+Presence of the READ_ONCE() in tun_napi_frags_enabled() gave a hint
+of what could happen, but we need something stronger to please
+syzbot.
+
+kasan: CONFIG_KASAN_INLINE enabled
+kasan: GPF could be caused by NULL-ptr deref or user memory access
+general protection fault: 0000 [#1] PREEMPT SMP KASAN
+CPU: 0 PID: 13647 Comm: syz-executor5 Not tainted 4.19.0-rc5+ #59
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+RIP: 0010:dev_gro_receive+0x132/0x2720 net/core/dev.c:5427
+Code: 48 c1 ea 03 80 3c 02 00 0f 85 6e 20 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b 6e 10 49 8d bd d0 00 00 00 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 59 20 00 00 4d 8b a5 d0 00 00 00 31 ff 41 81 e4
+RSP: 0018:ffff8801c400f410 EFLAGS: 00010202
+RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff8618d325
+RDX: 000000000000001a RSI: ffffffff86189f97 RDI: 00000000000000d0
+RBP: ffff8801c400f608 R08: ffff8801c8fb4300 R09: 0000000000000000
+R10: ffffed0038801ed7 R11: 0000000000000003 R12: ffff8801d327d358
+R13: 0000000000000000 R14: ffff8801c16dd8c0 R15: 0000000000000004
+FS:  00007fe003615700(0000) GS:ffff8801dac00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007fe1f3c43db8 CR3: 00000001bebb2000 CR4: 00000000001406f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ napi_gro_frags+0x3f4/0xc90 net/core/dev.c:5715
+ tun_get_user+0x31d5/0x42a0 drivers/net/tun.c:1922
+ tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:1967
+ call_write_iter include/linux/fs.h:1808 [inline]
+ new_sync_write fs/read_write.c:474 [inline]
+ __vfs_write+0x6b8/0x9f0 fs/read_write.c:487
+ vfs_write+0x1fc/0x560 fs/read_write.c:549
+ ksys_write+0x101/0x260 fs/read_write.c:598
+ __do_sys_write fs/read_write.c:610 [inline]
+ __se_sys_write fs/read_write.c:607 [inline]
+ __x64_sys_write+0x73/0xb0 fs/read_write.c:607
+ do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x457579
+Code: 1d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007fe003614c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457579
+RDX: 0000000000000012 RSI: 0000000020000000 RDI: 000000000000000a
+RBP: 000000000072c040 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe0036156d4
+R13: 00000000004c5574 R14: 00000000004d8e98 R15: 00000000ffffffff
+Modules linked in:
+
+RIP: 0010:dev_gro_receive+0x132/0x2720 net/core/dev.c:5427
+Code: 48 c1 ea 03 80 3c 02 00 0f 85 6e 20 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b 6e 10 49 8d bd d0 00 00 00 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 59 20 00 00 4d 8b a5 d0 00 00 00 31 ff 41 81 e4
+RSP: 0018:ffff8801c400f410 EFLAGS: 00010202
+RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff8618d325
+RDX: 000000000000001a RSI: ffffffff86189f97 RDI: 00000000000000d0
+RBP: ffff8801c400f608 R08: ffff8801c8fb4300 R09: 0000000000000000
+R10: ffffed0038801ed7 R11: 0000000000000003 R12: ffff8801d327d358
+R13: 0000000000000000 R14: ffff8801c16dd8c0 R15: 0000000000000004
+FS:  00007fe003615700(0000) GS:ffff8801dac00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007fe1f3c43db8 CR3: 00000001bebb2000 CR4: 00000000001406f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+
+Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |   29 ++++++++++++++++-------------
+ 1 file changed, 16 insertions(+), 13 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -181,6 +181,7 @@ struct tun_file {
+       };
+       struct napi_struct napi;
+       bool napi_enabled;
++      bool napi_frags_enabled;
+       struct mutex napi_mutex;        /* Protects access to the above napi */
+       struct list_head next;
+       struct tun_struct *detached;
+@@ -312,9 +313,10 @@ static int tun_napi_poll(struct napi_str
+ }
+ 
+ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
+-                        bool napi_en)
++                        bool napi_en, bool napi_frags)
+ {
+       tfile->napi_enabled = napi_en;
++      tfile->napi_frags_enabled = napi_en && napi_frags;
+       if (napi_en) {
+               netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
+                              NAPI_POLL_WEIGHT);
+@@ -334,9 +336,9 @@ static void tun_napi_del(struct tun_file
+               netif_napi_del(&tfile->napi);
+ }
+ 
+-static bool tun_napi_frags_enabled(const struct tun_struct *tun)
++static bool tun_napi_frags_enabled(const struct tun_file *tfile)
+ {
+-      return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS;
++      return tfile->napi_frags_enabled;
+ }
+ 
+ #ifdef CONFIG_TUN_VNET_CROSS_LE
+@@ -790,7 +792,7 @@ static void tun_detach_all(struct net_de
+ }
+ 
+ static int tun_attach(struct tun_struct *tun, struct file *file,
+-                    bool skip_filter, bool napi)
++                    bool skip_filter, bool napi, bool napi_frags)
+ {
+       struct tun_file *tfile = file->private_data;
+       struct net_device *dev = tun->dev;
+@@ -863,7 +865,7 @@ static int tun_attach(struct tun_struct
+               tun_enable_queue(tfile);
+       } else {
+               sock_hold(&tfile->sk);
+-              tun_napi_init(tun, tfile, napi);
++              tun_napi_init(tun, tfile, napi, napi_frags);
+       }
+ 
+       tun_set_real_num_queues(tun);
+@@ -1173,13 +1175,11 @@ static void tun_poll_controller(struct n
+               struct tun_file *tfile;
+               int i;
+ 
+-              if (tun_napi_frags_enabled(tun))
+-                      return;
+-
+               rcu_read_lock();
+               for (i = 0; i < tun->numqueues; i++) {
+                       tfile = rcu_dereference(tun->tfiles[i]);
+-                      if (tfile->napi_enabled)
++                      if (!tun_napi_frags_enabled(tfile) &&
++                          tfile->napi_enabled)
+                               napi_schedule(&tfile->napi);
+               }
+               rcu_read_unlock();
+@@ -1750,7 +1750,7 @@ static ssize_t tun_get_user(struct tun_s
+       int err;
+       u32 rxhash = 0;
+       int skb_xdp = 1;
+-      bool frags = tun_napi_frags_enabled(tun);
++      bool frags = tun_napi_frags_enabled(tfile);
+ 
+       if (!(tun->dev->flags & IFF_UP))
+               return -EIO;
+@@ -2575,7 +2575,8 @@ static int tun_set_iff(struct net *net,
+                       return err;
+ 
+               err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER,
+-                               ifr->ifr_flags & IFF_NAPI);
++                               ifr->ifr_flags & IFF_NAPI,
++                               ifr->ifr_flags & IFF_NAPI_FRAGS);
+               if (err < 0)
+                       return err;
+ 
+@@ -2673,7 +2674,8 @@ static int tun_set_iff(struct net *net,
+                             (ifr->ifr_flags & TUN_FEATURES);
+ 
+               INIT_LIST_HEAD(&tun->disabled);
+-              err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI);
++              err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI,
++                               ifr->ifr_flags & IFF_NAPI_FRAGS);
+               if (err < 0)
+                       goto err_free_flow;
+ 
+@@ -2822,7 +2824,8 @@ static int tun_set_queue(struct file *fi
+               ret = security_tun_dev_attach_queue(tun->security);
+               if (ret < 0)
+                       goto unlock;
+-              ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI);
++              ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI,
++                               tun->flags & IFF_NAPI_FRAGS);
+       } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
+               tun = rtnl_dereference(tfile->tun);
+               if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached)
diff --git a/queue-4.18/tun-remove-unused-parameters.patch b/queue-4.18/tun-remove-unused-parameters.patch

new file mode 100644 (file)

index 0000000..cce0df4
--- /dev/null
+++ b/queue-4.18/tun-remove-unused-parameters.patch
@@ -0,0 +1,66 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 28 Sep 2018 14:51:47 -0700
+Subject: tun: remove unused parameters
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 06e55addd3f40b5294e448c2cb7605ca4f28c2e3 ]
+
+tun_napi_disable() and tun_napi_del() do not need
+a pointer to the tun_struct
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -323,13 +323,13 @@ static void tun_napi_init(struct tun_str
+       }
+ }
+ 
+-static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile)
++static void tun_napi_disable(struct tun_file *tfile)
+ {
+       if (tfile->napi_enabled)
+               napi_disable(&tfile->napi);
+ }
+ 
+-static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile)
++static void tun_napi_del(struct tun_file *tfile)
+ {
+       if (tfile->napi_enabled)
+               netif_napi_del(&tfile->napi);
+@@ -688,8 +688,8 @@ static void __tun_detach(struct tun_file
+       tun = rtnl_dereference(tfile->tun);
+ 
+       if (tun && clean) {
+-              tun_napi_disable(tun, tfile);
+-              tun_napi_del(tun, tfile);
++              tun_napi_disable(tfile);
++              tun_napi_del(tfile);
+       }
+ 
+       if (tun && !tfile->detached) {
+@@ -756,7 +756,7 @@ static void tun_detach_all(struct net_de
+       for (i = 0; i < n; i++) {
+               tfile = rtnl_dereference(tun->tfiles[i]);
+               BUG_ON(!tfile);
+-              tun_napi_disable(tun, tfile);
++              tun_napi_disable(tfile);
+               tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
+               tfile->socket.sk->sk_data_ready(tfile->socket.sk);
+               RCU_INIT_POINTER(tfile->tun, NULL);
+@@ -772,7 +772,7 @@ static void tun_detach_all(struct net_de
+       synchronize_net();
+       for (i = 0; i < n; i++) {
+               tfile = rtnl_dereference(tun->tfiles[i]);
+-              tun_napi_del(tun, tfile);
++              tun_napi_del(tfile);
+               /* Drop read queue */
+               tun_queue_purge(tfile);
+               xdp_rxq_info_unreg(&tfile->xdp_rxq);
diff --git a/queue-4.18/udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch b/queue-4.18/udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch

new file mode 100644 (file)

index 0000000..e3c3be3
--- /dev/null
+++ b/queue-4.18/udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch
@@ -0,0 +1,43 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Jiri Kosina <jkosina@suse.cz>
+Date: Thu, 4 Oct 2018 13:37:32 +0200
+Subject: udp: Unbreak modules that rely on external __skb_recv_udp() availability
+
+From: Jiri Kosina <jkosina@suse.cz>
+
+[ Upstream commit 7e823644b60555f70f241274b8d0120dd919269a ]
+
+Commit 2276f58ac589 ("udp: use a separate rx queue for packet reception")
+turned static inline __skb_recv_udp() from being a trivial helper around
+__skb_recv_datagram() into a UDP specific implementaion, making it
+EXPORT_SYMBOL_GPL() at the same time.
+
+There are external modules that got broken by __skb_recv_udp() not being
+visible to them. Let's unbreak them by making __skb_recv_udp EXPORT_SYMBOL().
+
+Rationale (one of those) why this is actually "technically correct" thing
+to do: __skb_recv_udp() used to be an inline wrapper around
+__skb_recv_datagram(), which itself (still, and correctly so, I believe)
+is EXPORT_SYMBOL().
+
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Fixes: 2276f58ac589 ("udp: use a separate rx queue for packet reception")
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1631,7 +1631,7 @@ busy_check:
+       *err = error;
+       return NULL;
+ }
+-EXPORT_SYMBOL_GPL(__skb_recv_udp);
++EXPORT_SYMBOL(__skb_recv_udp);
+ 
+ /*
+  *    This should be easy, if there is something there we
diff --git a/queue-4.18/vxlan-fill-ttl-inherit-info.patch b/queue-4.18/vxlan-fill-ttl-inherit-info.patch

new file mode 100644 (file)

index 0000000..c3c6bb9
--- /dev/null
+++ b/queue-4.18/vxlan-fill-ttl-inherit-info.patch
@@ -0,0 +1,39 @@
+From foo@baz Tue Oct 16 07:06:17 CEST 2018
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Wed, 26 Sep 2018 10:35:42 +0800
+Subject: vxlan: fill ttl inherit info
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 8fd780698745ba121530c5c20fd237aacde4c371 ]
+
+When add vxlan ttl inherit support, I forgot to fill it when dump
+vlxan info. Fix it now.
+
+Fixes: 72f6d71e491e6 ("vxlan: add ttl inherit support")
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -3537,6 +3537,7 @@ static size_t vxlan_get_size(const struc
+               nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */
+               nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */
+               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TTL */
++              nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TTL_INHERIT */
+               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TOS */
+               nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
+               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_LEARNING */
+@@ -3601,6 +3602,8 @@ static int vxlan_fill_info(struct sk_buf
+       }
+ 
+       if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
++          nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
++                     !!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
+           nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
+           nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
+           nla_put_u8(skb, IFLA_VXLAN_LEARNING,
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 16 Oct 2018 05:08:16 +0000 (07:08 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 16 Oct 2018 05:08:16 +0000 (07:08 +0200)
queue-4.18/bnxt_en-don-t-try-to-offload-vlan-modify-action.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/bnxt_en-fix-enables-field-in-hwrm_queue_cos2bw_cfg-request.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/bnxt_en-fix-tx-timeout-during-netpoll.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/bnxt_en-fix-vnic-reservations-on-the-pf.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/bnxt_en-get-the-reduced-max_irqs-by-the-ones-used-by-rdma.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/bonding-avoid-possible-dead-lock.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/bonding-fix-warning-message.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/bonding-pass-link-local-packets-to-bonding-master-also.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/ip6_tunnel-be-careful-when-accessing-the-inner-header.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/ip_tunnel-be-careful-when-accessing-the-inner-header.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-aquantia-memory-corruption-on-jumbo-frames.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-dsa-b53-keep-cpu-port-as-tagged-in-all-vlans.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-dsa-bcm_sf2-call-setup-during-switch-resume.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-dsa-bcm_sf2-fix-unbind-ordering.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-ipv4-don-t-let-pmtu-updates-increase-route-mtu.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-ipv6-remove-extra-call-to-ip6_convert_metrics-for-multipath-case.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-ipv6-stop-leaking-percpu-memory-in-fib6-info.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-mlx5-check-for-sq-and-not-rq-state-when-modifying-hairpin-sq.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-mscc-fix-the-frame-extraction-into-the-skb.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-mvpp2-fix-a-txq_done-race-condition.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-packet-fix-packet-drop-as-of-virtio-gso.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-phy-phylink-fix-sfp-interface-autodetection.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-receive-path.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-transmit.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-qualcomm-rmnet-skip-processing-loopback-packets.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-sched-add-policy-validation-for-tc-attributes.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-sched-cls_u32-fix-hnode-refcounting.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-stmmac-rework-coalesce-timer-and-fix-multi-queue-races.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-systemport-fix-wake-up-interrupt-race-during-resume.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/netlabel-check-for-ipv4mask-in-addrinfo_get.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/nfp-avoid-soft-lockups-under-control-message-storm.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/qed-fix-shmem-structure-inconsistency-between-driver-and-the-mfw.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/r8169-fix-network-stalls-due-to-missing-bit-txcfg_auto_fifo.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/r8169-set-rx_multi_en-bit-in-rxconfig-for-8168f-family-chips.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/rtnetlink-fail-dump-if-target-netnsid-is-invalid.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/sctp-update-dst-pmtu-with-the-correct-daddr.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/series	[new file with mode: 0644]	patch \| blob
queue-4.18/sfp-fix-oops-with-ethtool-m.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/team-forbid-enslaving-team-device-to-itself.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/tipc-fix-flow-control-accounting-for-implicit-connect.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/tun-initialize-napi_mutex-unconditionally.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/tun-napi-flags-belong-to-tfile.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/tun-remove-unused-parameters.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch	[new file with mode: 0644]	patch \| blob
queue-4.18/vxlan-fill-ttl-inherit-info.patch	[new file with mode: 0644]	patch \| blob