From: Greg Kroah-Hartman Date: Tue, 16 Oct 2018 05:08:16 +0000 (+0200) Subject: 4.18-stable patches X-Git-Tag: v4.9.134~28 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8795579f39fb1d9fb9f04af92a59980e6406d476;p=thirdparty%2Fkernel%2Fstable-queue.git 4.18-stable patches added patches: bnxt_en-don-t-try-to-offload-vlan-modify-action.patch bnxt_en-fix-enables-field-in-hwrm_queue_cos2bw_cfg-request.patch bnxt_en-fix-tx-timeout-during-netpoll.patch bnxt_en-fix-vnic-reservations-on-the-pf.patch bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch bnxt_en-get-the-reduced-max_irqs-by-the-ones-used-by-rdma.patch bonding-avoid-possible-dead-lock.patch bonding-fix-warning-message.patch bonding-pass-link-local-packets-to-bonding-master-also.patch inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch ip6_tunnel-be-careful-when-accessing-the-inner-header.patch ip_tunnel-be-careful-when-accessing-the-inner-header.patch ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch net-aquantia-memory-corruption-on-jumbo-frames.patch net-dsa-b53-keep-cpu-port-as-tagged-in-all-vlans.patch net-dsa-bcm_sf2-call-setup-during-switch-resume.patch net-dsa-bcm_sf2-fix-unbind-ordering.patch net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch net-ipv4-don-t-let-pmtu-updates-increase-route-mtu.patch net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch net-ipv6-remove-extra-call-to-ip6_convert_metrics-for-multipath-case.patch net-ipv6-stop-leaking-percpu-memory-in-fib6-info.patch net-mlx5-check-for-sq-and-not-rq-state-when-modifying-hairpin-sq.patch net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch net-mscc-fix-the-frame-extraction-into-the-skb.patch net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch net-mvpp2-fix-a-txq_done-race-condition.patch net-packet-fix-packet-drop-as-of-virtio-gso.patch net-phy-phylink-fix-sfp-interface-autodetection.patch net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-receive-path.patch net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-transmit.patch net-qualcomm-rmnet-skip-processing-loopback-packets.patch net-sched-add-policy-validation-for-tc-attributes.patch net-sched-cls_u32-fix-hnode-refcounting.patch net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch net-stmmac-rework-coalesce-timer-and-fix-multi-queue-races.patch net-systemport-fix-wake-up-interrupt-race-during-resume.patch net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch netlabel-check-for-ipv4mask-in-addrinfo_get.patch nfp-avoid-soft-lockups-under-control-message-storm.patch qed-fix-shmem-structure-inconsistency-between-driver-and-the-mfw.patch qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch r8169-fix-network-stalls-due-to-missing-bit-txcfg_auto_fifo.patch r8169-set-rx_multi_en-bit-in-rxconfig-for-8168f-family-chips.patch rtnetlink-fail-dump-if-target-netnsid-is-invalid.patch rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch sctp-update-dst-pmtu-with-the-correct-daddr.patch sfp-fix-oops-with-ethtool-m.patch tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch team-forbid-enslaving-team-device-to-itself.patch tipc-fix-flow-control-accounting-for-implicit-connect.patch tun-initialize-napi_mutex-unconditionally.patch tun-napi-flags-belong-to-tfile.patch tun-remove-unused-parameters.patch udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch vxlan-fill-ttl-inherit-info.patch --- diff --git a/queue-4.18/bnxt_en-don-t-try-to-offload-vlan-modify-action.patch b/queue-4.18/bnxt_en-don-t-try-to-offload-vlan-modify-action.patch new file mode 100644 index 00000000000..74a38075830 --- /dev/null +++ b/queue-4.18/bnxt_en-don-t-try-to-offload-vlan-modify-action.patch @@ -0,0 +1,63 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Davide Caratti +Date: Wed, 19 Sep 2018 19:01:37 +0200 +Subject: bnxt_en: don't try to offload VLAN 'modify' action + +From: Davide Caratti + +[ Upstream commit 8c6ec3613e7b0aade20a3196169c0bab32ed3e3f ] + +bnxt offload code currently supports only 'push' and 'pop' operation: let +.ndo_setup_tc() return -EOPNOTSUPP if VLAN 'modify' action is configured. + +Fixes: 2ae7408fedfe ("bnxt_en: bnxt: add TC flower filter offload support") +Signed-off-by: Davide Caratti +Acked-by: Sathya Perla +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +@@ -75,17 +75,23 @@ static int bnxt_tc_parse_redir(struct bn + return 0; + } + +-static void bnxt_tc_parse_vlan(struct bnxt *bp, +- struct bnxt_tc_actions *actions, +- const struct tc_action *tc_act) ++static int bnxt_tc_parse_vlan(struct bnxt *bp, ++ struct bnxt_tc_actions *actions, ++ const struct tc_action *tc_act) + { +- if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_POP) { ++ switch (tcf_vlan_action(tc_act)) { ++ case TCA_VLAN_ACT_POP: + actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN; +- } else if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_PUSH) { ++ break; ++ case TCA_VLAN_ACT_PUSH: + actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN; + actions->push_vlan_tci = htons(tcf_vlan_push_vid(tc_act)); + actions->push_vlan_tpid = tcf_vlan_push_proto(tc_act); ++ break; ++ default: ++ return -EOPNOTSUPP; + } ++ return 0; + } + + static int bnxt_tc_parse_tunnel_set(struct bnxt *bp, +@@ -136,7 +142,9 @@ static int bnxt_tc_parse_actions(struct + + /* Push/pop VLAN */ + if (is_tcf_vlan(tc_act)) { +- bnxt_tc_parse_vlan(bp, actions, tc_act); ++ rc = bnxt_tc_parse_vlan(bp, actions, tc_act); ++ if (rc) ++ return rc; + continue; + } + diff --git a/queue-4.18/bnxt_en-fix-enables-field-in-hwrm_queue_cos2bw_cfg-request.patch b/queue-4.18/bnxt_en-fix-enables-field-in-hwrm_queue_cos2bw_cfg-request.patch new file mode 100644 index 00000000000..7fdc334d89a --- /dev/null +++ b/queue-4.18/bnxt_en-fix-enables-field-in-hwrm_queue_cos2bw_cfg-request.patch @@ -0,0 +1,43 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Vasundhara Volam +Date: Fri, 5 Oct 2018 00:26:01 -0400 +Subject: bnxt_en: Fix enables field in HWRM_QUEUE_COS2BW_CFG request + +From: Vasundhara Volam + +[ Upstream commit 5db0e0969af6501ad45fe0494039d3b9c797822b ] + +In HWRM_QUEUE_COS2BW_CFG request, enables field should have the bits +set only for the queue ids which are having the valid parameters. + +This causes firmware to return error when the TC to hardware CoS queue +mapping is not 1:1 during DCBNL ETS setup. + +Fixes: 2e8ef77ee0ff ("bnxt_en: Add TC to hardware QoS queue mapping logic.") +Signed-off-by: Vasundhara Volam +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +@@ -98,13 +98,13 @@ static int bnxt_hwrm_queue_cos2bw_cfg(st + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1); + for (i = 0; i < max_tc; i++) { +- u8 qidx; ++ u8 qidx = bp->tc_to_qidx[i]; + + req.enables |= cpu_to_le32( +- QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << i); ++ QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << ++ qidx); + + memset(&cos2bw, 0, sizeof(cos2bw)); +- qidx = bp->tc_to_qidx[i]; + cos2bw.queue_id = bp->q_info[qidx].queue_id; + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_STRICT) { + cos2bw.tsa = diff --git a/queue-4.18/bnxt_en-fix-tx-timeout-during-netpoll.patch b/queue-4.18/bnxt_en-fix-tx-timeout-during-netpoll.patch new file mode 100644 index 00000000000..04188ea1853 --- /dev/null +++ b/queue-4.18/bnxt_en-fix-tx-timeout-during-netpoll.patch @@ -0,0 +1,73 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Michael Chan +Date: Wed, 26 Sep 2018 00:41:04 -0400 +Subject: bnxt_en: Fix TX timeout during netpoll. + +From: Michael Chan + +[ Upstream commit 73f21c653f930f438d53eed29b5e4c65c8a0f906 ] + +The current netpoll implementation in the bnxt_en driver has problems +that may miss TX completion events. bnxt_poll_work() in effect is +only handling at most 1 TX packet before exiting. In addition, +there may be in flight TX completions that ->poll() may miss even +after we fix bnxt_poll_work() to handle all visible TX completions. +netpoll may not call ->poll() again and HW may not generate IRQ +because the driver does not ARM the IRQ when the budget (0 for netpoll) +is reached. + +We fix it by handling all TX completions and to always ARM the IRQ +when we exit ->poll() with 0 budget. + +Also, the logic to ACK the completion ring in case it is almost filled +with TX completions need to be adjusted to take care of the 0 budget +case, as discussed with Eric Dumazet + +Reported-by: Song Liu +Reviewed-by: Song Liu +Tested-by: Song Liu +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -1882,8 +1882,11 @@ static int bnxt_poll_work(struct bnxt *b + if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) { + tx_pkts++; + /* return full budget so NAPI will complete. */ +- if (unlikely(tx_pkts > bp->tx_wake_thresh)) ++ if (unlikely(tx_pkts > bp->tx_wake_thresh)) { + rx_pkts = budget; ++ raw_cons = NEXT_RAW_CMP(raw_cons); ++ break; ++ } + } else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { + if (likely(budget)) + rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event); +@@ -1911,7 +1914,7 @@ static int bnxt_poll_work(struct bnxt *b + } + raw_cons = NEXT_RAW_CMP(raw_cons); + +- if (rx_pkts == budget) ++ if (rx_pkts && rx_pkts == budget) + break; + } + +@@ -2025,8 +2028,12 @@ static int bnxt_poll(struct napi_struct + while (1) { + work_done += bnxt_poll_work(bp, bnapi, budget - work_done); + +- if (work_done >= budget) ++ if (work_done >= budget) { ++ if (!budget) ++ BNXT_CP_DB_REARM(cpr->cp_doorbell, ++ cpr->cp_raw_cons); + break; ++ } + + if (!bnxt_has_work(bp, cpr)) { + if (napi_complete_done(napi, work_done)) diff --git a/queue-4.18/bnxt_en-fix-vnic-reservations-on-the-pf.patch b/queue-4.18/bnxt_en-fix-vnic-reservations-on-the-pf.patch new file mode 100644 index 00000000000..d5eaaa03a80 --- /dev/null +++ b/queue-4.18/bnxt_en-fix-vnic-reservations-on-the-pf.patch @@ -0,0 +1,34 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Michael Chan +Date: Fri, 5 Oct 2018 00:26:00 -0400 +Subject: bnxt_en: Fix VNIC reservations on the PF. + +From: Michael Chan + +[ Upstream commit dbe80d446c859873820eedfff4abc61c71f1927b ] + +The enables bit for VNIC was set wrong when calling the HWRM_FUNC_CFG +firmware call to reserve VNICs. This has the effect that the firmware +will keep a large number of VNICs for the PF, and having very few for +VFs. DPDK driver running on the VFs, which requires more VNICs, may not +work properly as a result. + +Fixes: 674f50a5b026 ("bnxt_en: Implement new method to reserve rings.") +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -4651,7 +4651,7 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt + FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + enables |= ring_grps ? + FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0; +- enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0; ++ enables |= vnics ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0; + + req->num_rx_rings = cpu_to_le16(rx_rings); + req->num_hw_ring_grps = cpu_to_le16(ring_grps); diff --git a/queue-4.18/bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch b/queue-4.18/bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch new file mode 100644 index 00000000000..1f1f1fafffd --- /dev/null +++ b/queue-4.18/bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch @@ -0,0 +1,50 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Venkat Duvvuru +Date: Fri, 5 Oct 2018 00:26:02 -0400 +Subject: bnxt_en: free hwrm resources, if driver probe fails. + +From: Venkat Duvvuru + +[ Upstream commit a2bf74f4e1b82395dad2b08d2a911d9151db71c1 ] + +When the driver probe fails, all the resources that were allocated prior +to the failure must be freed. However, hwrm dma response memory is not +getting freed. + +This patch fixes the problem described above. + +Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") +Signed-off-by: Venkat Duvvuru +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -3015,10 +3015,11 @@ static void bnxt_free_hwrm_resources(str + { + struct pci_dev *pdev = bp->pdev; + +- dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr, +- bp->hwrm_cmd_resp_dma_addr); +- +- bp->hwrm_cmd_resp_addr = NULL; ++ if (bp->hwrm_cmd_resp_addr) { ++ dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr, ++ bp->hwrm_cmd_resp_dma_addr); ++ bp->hwrm_cmd_resp_addr = NULL; ++ } + if (bp->hwrm_dbg_resp_addr) { + dma_free_coherent(&pdev->dev, HWRM_DBG_REG_BUF_SIZE, + bp->hwrm_dbg_resp_addr, +@@ -8931,6 +8932,7 @@ init_err_cleanup_tc: + bnxt_clear_int_mode(bp); + + init_err_pci_clean: ++ bnxt_free_hwrm_resources(bp); + bnxt_cleanup_pci(bp); + + init_err_free: diff --git a/queue-4.18/bnxt_en-get-the-reduced-max_irqs-by-the-ones-used-by-rdma.patch b/queue-4.18/bnxt_en-get-the-reduced-max_irqs-by-the-ones-used-by-rdma.patch new file mode 100644 index 00000000000..d53cf3ce526 --- /dev/null +++ b/queue-4.18/bnxt_en-get-the-reduced-max_irqs-by-the-ones-used-by-rdma.patch @@ -0,0 +1,36 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Vasundhara Volam +Date: Fri, 5 Oct 2018 00:26:03 -0400 +Subject: bnxt_en: get the reduced max_irqs by the ones used by RDMA + +From: Vasundhara Volam + +[ Upstream commit c78fe058879bdea919d44f23e21da26f603e9166 ] + +When getting the max rings supported, get the reduced max_irqs +by the ones used by RDMA. + +If the number MSIX is the limiting factor, this bug may cause the +max ring count to be higher than it should be when RDMA driver is +loaded and may result in ring allocation failures. + +Fixes: 30f529473ec9 ("bnxt_en: Do not modify max IRQ count after RDMA driver requests/frees IRQs.") +Signed-off-by: Vasundhara Volam +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -8501,7 +8501,7 @@ static void _bnxt_get_max_rings(struct b + *max_tx = hw_resc->max_tx_rings; + *max_rx = hw_resc->max_rx_rings; + *max_cp = min_t(int, bnxt_get_max_func_cp_rings_for_en(bp), +- hw_resc->max_irqs); ++ hw_resc->max_irqs - bnxt_get_ulp_msix_num(bp)); + *max_cp = min_t(int, *max_cp, hw_resc->max_stat_ctxs); + max_ring_grps = hw_resc->max_hw_ring_grps; + if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) { diff --git a/queue-4.18/bonding-avoid-possible-dead-lock.patch b/queue-4.18/bonding-avoid-possible-dead-lock.patch new file mode 100644 index 00000000000..dfd4c17939f --- /dev/null +++ b/queue-4.18/bonding-avoid-possible-dead-lock.patch @@ -0,0 +1,244 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Mahesh Bandewar +Date: Mon, 24 Sep 2018 14:40:11 -0700 +Subject: bonding: avoid possible dead-lock + +From: Mahesh Bandewar + +[ Upstream commit d4859d749aa7090ffb743d15648adb962a1baeae ] + +Syzkaller reported this on a slightly older kernel but it's still +applicable to the current kernel - + +====================================================== +WARNING: possible circular locking dependency detected +4.18.0-next-20180823+ #46 Not tainted +------------------------------------------------------ +syz-executor4/26841 is trying to acquire lock: +00000000dd41ef48 ((wq_completion)bond_dev->name){+.+.}, at: flush_workqueue+0x2db/0x1e10 kernel/workqueue.c:2652 + +but task is already holding lock: +00000000768ab431 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline] +00000000768ab431 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4708 + +which lock already depends on the new lock. + +the existing dependency chain (in reverse order) is: + +-> #2 (rtnl_mutex){+.+.}: + __mutex_lock_common kernel/locking/mutex.c:925 [inline] + __mutex_lock+0x171/0x1700 kernel/locking/mutex.c:1073 + mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:1088 + rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77 + bond_netdev_notify drivers/net/bonding/bond_main.c:1310 [inline] + bond_netdev_notify_work+0x44/0xd0 drivers/net/bonding/bond_main.c:1320 + process_one_work+0xc73/0x1aa0 kernel/workqueue.c:2153 + worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 + kthread+0x35a/0x420 kernel/kthread.c:246 + ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 + +-> #1 ((work_completion)(&(&nnw->work)->work)){+.+.}: + process_one_work+0xc0b/0x1aa0 kernel/workqueue.c:2129 + worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 + kthread+0x35a/0x420 kernel/kthread.c:246 + ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 + +-> #0 ((wq_completion)bond_dev->name){+.+.}: + lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 + flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 + drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 + destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 + __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 + bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 + register_netdevice+0x337/0x1100 net/core/dev.c:8410 + bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 + rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 + rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 + netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 + rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 + netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] + netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 + netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 + sock_sendmsg_nosec net/socket.c:622 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:632 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 + __sys_sendmsg+0x11d/0x290 net/socket.c:2153 + __do_sys_sendmsg net/socket.c:2162 [inline] + __se_sys_sendmsg net/socket.c:2160 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +other info that might help us debug this: + +Chain exists of: + (wq_completion)bond_dev->name --> (work_completion)(&(&nnw->work)->work) --> rtnl_mutex + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(rtnl_mutex); + lock((work_completion)(&(&nnw->work)->work)); + lock(rtnl_mutex); + lock((wq_completion)bond_dev->name); + + *** DEADLOCK *** + +1 lock held by syz-executor4/26841: + +stack backtrace: +CPU: 1 PID: 26841 Comm: syz-executor4 Not tainted 4.18.0-next-20180823+ #46 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 + print_circular_bug.isra.34.cold.55+0x1bd/0x27d kernel/locking/lockdep.c:1222 + check_prev_add kernel/locking/lockdep.c:1862 [inline] + check_prevs_add kernel/locking/lockdep.c:1975 [inline] + validate_chain kernel/locking/lockdep.c:2416 [inline] + __lock_acquire+0x3449/0x5020 kernel/locking/lockdep.c:3412 + lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 + flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 + drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 + destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 + __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 + bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 + register_netdevice+0x337/0x1100 net/core/dev.c:8410 + bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 + rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 + rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 + netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 + rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 + netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] + netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 + netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 + sock_sendmsg_nosec net/socket.c:622 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:632 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 + __sys_sendmsg+0x11d/0x290 net/socket.c:2153 + __do_sys_sendmsg net/socket.c:2162 [inline] + __se_sys_sendmsg net/socket.c:2160 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x457089 +Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007f2df20a5c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f2df20a66d4 RCX: 0000000000457089 +RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 +RBP: 0000000000930140 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 00000000004d40b8 R14: 00000000004c8ad8 R15: 0000000000000001 + +Signed-off-by: Mahesh Bandewar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 43 +++++++++++++++------------------------- + include/net/bonding.h | 7 ------ + 2 files changed, 18 insertions(+), 32 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -210,6 +210,7 @@ static void bond_get_stats(struct net_de + static void bond_slave_arr_handler(struct work_struct *work); + static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, + int mod); ++static void bond_netdev_notify_work(struct work_struct *work); + + /*---------------------------- General routines -----------------------------*/ + +@@ -1276,6 +1277,8 @@ static struct slave *bond_alloc_slave(st + return NULL; + } + } ++ INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); ++ + return slave; + } + +@@ -1283,6 +1286,7 @@ static void bond_free_slave(struct slave + { + struct bonding *bond = bond_get_bond_by_slave(slave); + ++ cancel_delayed_work_sync(&slave->notify_work); + if (BOND_MODE(bond) == BOND_MODE_8023AD) + kfree(SLAVE_AD_INFO(slave)); + +@@ -1304,39 +1308,26 @@ static void bond_fill_ifslave(struct sla + info->link_failure_count = slave->link_failure_count; + } + +-static void bond_netdev_notify(struct net_device *dev, +- struct netdev_bonding_info *info) +-{ +- rtnl_lock(); +- netdev_bonding_info_change(dev, info); +- rtnl_unlock(); +-} +- + static void bond_netdev_notify_work(struct work_struct *_work) + { +- struct netdev_notify_work *w = +- container_of(_work, struct netdev_notify_work, work.work); ++ struct slave *slave = container_of(_work, struct slave, ++ notify_work.work); ++ ++ if (rtnl_trylock()) { ++ struct netdev_bonding_info binfo; + +- bond_netdev_notify(w->dev, &w->bonding_info); +- dev_put(w->dev); +- kfree(w); ++ bond_fill_ifslave(slave, &binfo.slave); ++ bond_fill_ifbond(slave->bond, &binfo.master); ++ netdev_bonding_info_change(slave->dev, &binfo); ++ rtnl_unlock(); ++ } else { ++ queue_delayed_work(slave->bond->wq, &slave->notify_work, 1); ++ } + } + + void bond_queue_slave_event(struct slave *slave) + { +- struct bonding *bond = slave->bond; +- struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC); +- +- if (!nnw) +- return; +- +- dev_hold(slave->dev); +- nnw->dev = slave->dev; +- bond_fill_ifslave(slave, &nnw->bonding_info.slave); +- bond_fill_ifbond(bond, &nnw->bonding_info.master); +- INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work); +- +- queue_delayed_work(slave->bond->wq, &nnw->work, 0); ++ queue_delayed_work(slave->bond->wq, &slave->notify_work, 0); + } + + void bond_lower_state_changed(struct slave *slave) +--- a/include/net/bonding.h ++++ b/include/net/bonding.h +@@ -139,12 +139,6 @@ struct bond_parm_tbl { + int mode; + }; + +-struct netdev_notify_work { +- struct delayed_work work; +- struct net_device *dev; +- struct netdev_bonding_info bonding_info; +-}; +- + struct slave { + struct net_device *dev; /* first - useful for panic debug */ + struct bonding *bond; /* our master */ +@@ -172,6 +166,7 @@ struct slave { + #ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *np; + #endif ++ struct delayed_work notify_work; + struct kobject kobj; + struct rtnl_link_stats64 slave_stats; + }; diff --git a/queue-4.18/bonding-fix-warning-message.patch b/queue-4.18/bonding-fix-warning-message.patch new file mode 100644 index 00000000000..eb5ad484e4e --- /dev/null +++ b/queue-4.18/bonding-fix-warning-message.patch @@ -0,0 +1,40 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Mahesh Bandewar +Date: Tue, 2 Oct 2018 12:14:34 -0700 +Subject: bonding: fix warning message + +From: Mahesh Bandewar + +[ Upstream commit 0f3b914c9cfcd7bbedd445dc4ac5dd999fa213c2 ] + +RX queue config for bonding master could be different from its slave +device(s). With the commit 6a9e461f6fe4 ("bonding: pass link-local +packets to bonding master also."), the packet is reinjected into stack +with skb->dev as bonding master. This potentially triggers the +message: + + "bondX received packet on queue Y, but number of RX queues is Z" + +whenever the queue that packet is received on is higher than the +numrxqueues on bonding master (Y > Z). + +Fixes: 6a9e461f6fe4 ("bonding: pass link-local packets to bonding master also.") +Reported-by: John Sperbeck +Signed-off-by: Eric Dumazet +Signed-off-by: Mahesh Bandewar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1194,6 +1194,7 @@ static rx_handler_result_t bond_handle_f + + if (nskb) { + nskb->dev = bond->dev; ++ nskb->queue_mapping = 0; + netif_rx(nskb); + } + return RX_HANDLER_PASS; diff --git a/queue-4.18/bonding-pass-link-local-packets-to-bonding-master-also.patch b/queue-4.18/bonding-pass-link-local-packets-to-bonding-master-also.patch new file mode 100644 index 00000000000..4c09e693842 --- /dev/null +++ b/queue-4.18/bonding-pass-link-local-packets-to-bonding-master-also.patch @@ -0,0 +1,59 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Mahesh Bandewar +Date: Mon, 24 Sep 2018 14:39:42 -0700 +Subject: bonding: pass link-local packets to bonding master also. + +From: Mahesh Bandewar + +[ Upstream commit 6a9e461f6fe4434e6172304b69774daff9a3ac4c ] + +Commit b89f04c61efe ("bonding: deliver link-local packets with +skb->dev set to link that packets arrived on") changed the behavior +of how link-local-multicast packets are processed. The change in +the behavior broke some legacy use cases where these packets are +expected to arrive on bonding master device also. + +This patch passes the packet to the stack with the link it arrived +on as well as passes to the bonding-master device to preserve the +legacy use case. + +Fixes: b89f04c61efe ("bonding: deliver link-local packets with skb->dev set to link that packets arrived on") +Reported-by: Michal Soltys +Signed-off-by: Mahesh Bandewar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 21 +++++++++++++++++++-- + 1 file changed, 19 insertions(+), 2 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1178,9 +1178,26 @@ static rx_handler_result_t bond_handle_f + } + } + +- /* don't change skb->dev for link-local packets */ +- if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) ++ /* Link-local multicast packets should be passed to the ++ * stack on the link they arrive as well as pass them to the ++ * bond-master device. These packets are mostly usable when ++ * stack receives it with the link on which they arrive ++ * (e.g. LLDP) they also must be available on master. Some of ++ * the use cases include (but are not limited to): LLDP agents ++ * that must be able to operate both on enslaved interfaces as ++ * well as on bonds themselves; linux bridges that must be able ++ * to process/pass BPDUs from attached bonds when any kind of ++ * STP version is enabled on the network. ++ */ ++ if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) { ++ struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); ++ ++ if (nskb) { ++ nskb->dev = bond->dev; ++ netif_rx(nskb); ++ } + return RX_HANDLER_PASS; ++ } + if (bond_should_deliver_exact_match(skb, slave, bond)) + return RX_HANDLER_EXACT; + diff --git a/queue-4.18/inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch b/queue-4.18/inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch new file mode 100644 index 00000000000..c76613844eb --- /dev/null +++ b/queue-4.18/inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch @@ -0,0 +1,101 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Eric Dumazet +Date: Tue, 2 Oct 2018 12:35:05 -0700 +Subject: inet: make sure to grab rcu_read_lock before using ireq->ireq_opt + +From: Eric Dumazet + +[ Upstream commit 2ab2ddd301a22ca3c5f0b743593e4ad2953dfa53 ] + +Timer handlers do not imply rcu_read_lock(), so my recent fix +triggered a LOCKDEP warning when SYNACK is retransmit. + +Lets add rcu_read_lock()/rcu_read_unlock() pairs around ireq->ireq_opt +usages instead of guessing what is done by callers, since it is +not worth the pain. + +Get rid of ireq_opt_deref() helper since it hides the logic +without real benefit, since it is now a standard rcu_dereference(). + +Fixes: 1ad98e9d1bdf ("tcp/dccp: fix lockdep issue when SYN is backlogged") +Signed-off-by: Eric Dumazet +Reported-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_sock.h | 5 ----- + net/dccp/ipv4.c | 4 +++- + net/ipv4/inet_connection_sock.c | 5 ++++- + net/ipv4/tcp_ipv4.c | 4 +++- + 4 files changed, 10 insertions(+), 8 deletions(-) + +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -130,11 +130,6 @@ static inline int inet_request_bound_dev + return sk->sk_bound_dev_if; + } + +-static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) +-{ +- return rcu_dereference(ireq->ireq_opt); +-} +- + struct inet_cork { + unsigned int flags; + __be32 addr; +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -493,9 +493,11 @@ static int dccp_v4_send_response(const s + + dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, + ireq->ir_rmt_addr); ++ rcu_read_lock(); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- ireq_opt_deref(ireq)); ++ rcu_dereference(ireq->ireq_opt)); ++ rcu_read_unlock(); + err = net_xmit_eval(err); + } + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -535,7 +535,8 @@ struct dst_entry *inet_csk_route_req(con + struct ip_options_rcu *opt; + struct rtable *rt; + +- opt = ireq_opt_deref(ireq); ++ rcu_read_lock(); ++ opt = rcu_dereference(ireq->ireq_opt); + + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, +@@ -549,11 +550,13 @@ struct dst_entry *inet_csk_route_req(con + goto no_route; + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) + goto route_err; ++ rcu_read_unlock(); + return &rt->dst; + + route_err: + ip_rt_put(rt); + no_route: ++ rcu_read_unlock(); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); + return NULL; + } +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -942,9 +942,11 @@ static int tcp_v4_send_synack(const stru + if (skb) { + __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + ++ rcu_read_lock(); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- ireq_opt_deref(ireq)); ++ rcu_dereference(ireq->ireq_opt)); ++ rcu_read_unlock(); + err = net_xmit_eval(err); + } + diff --git a/queue-4.18/ip6_tunnel-be-careful-when-accessing-the-inner-header.patch b/queue-4.18/ip6_tunnel-be-careful-when-accessing-the-inner-header.patch new file mode 100644 index 00000000000..68f949991e1 --- /dev/null +++ b/queue-4.18/ip6_tunnel-be-careful-when-accessing-the-inner-header.patch @@ -0,0 +1,136 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Paolo Abeni +Date: Wed, 19 Sep 2018 15:02:07 +0200 +Subject: ip6_tunnel: be careful when accessing the inner header + +From: Paolo Abeni + +[ Upstream commit 76c0ddd8c3a683f6e2c6e60e11dc1a1558caf4bc ] + +the ip6 tunnel xmit ndo assumes that the processed skb always +contains an ip[v6] header, but syzbot has found a way to send +frames that fall short of this assumption, leading to the following splat: + +BUG: KMSAN: uninit-value in ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 +[inline] +BUG: KMSAN: uninit-value in ip6_tnl_start_xmit+0x7d2/0x1ef0 +net/ipv6/ip6_tunnel.c:1390 +CPU: 0 PID: 4504 Comm: syz-executor558 Not tainted 4.16.0+ #87 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS +Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:17 [inline] + dump_stack+0x185/0x1d0 lib/dump_stack.c:53 + kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 + __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683 + ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 [inline] + ip6_tnl_start_xmit+0x7d2/0x1ef0 net/ipv6/ip6_tunnel.c:1390 + __netdev_start_xmit include/linux/netdevice.h:4066 [inline] + netdev_start_xmit include/linux/netdevice.h:4075 [inline] + xmit_one net/core/dev.c:3026 [inline] + dev_hard_start_xmit+0x5f1/0xc70 net/core/dev.c:3042 + __dev_queue_xmit+0x27ee/0x3520 net/core/dev.c:3557 + dev_queue_xmit+0x4b/0x60 net/core/dev.c:3590 + packet_snd net/packet/af_packet.c:2944 [inline] + packet_sendmsg+0x7c70/0x8a30 net/packet/af_packet.c:2969 + sock_sendmsg_nosec net/socket.c:630 [inline] + sock_sendmsg net/socket.c:640 [inline] + ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 + __sys_sendmmsg+0x42d/0x800 net/socket.c:2136 + SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167 + SyS_sendmmsg+0x63/0x90 net/socket.c:2162 + do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 +RIP: 0033:0x441819 +RSP: 002b:00007ffe58ee8268 EFLAGS: 00000213 ORIG_RAX: 0000000000000133 +RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000441819 +RDX: 0000000000000002 RSI: 0000000020000100 RDI: 0000000000000003 +RBP: 00000000006cd018 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000213 R12: 0000000000402510 +R13: 00000000004025a0 R14: 0000000000000000 R15: 0000000000000000 + +Uninit was created at: + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] + kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 + kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 + kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 + slab_post_alloc_hook mm/slab.h:445 [inline] + slab_alloc_node mm/slub.c:2737 [inline] + __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 + __kmalloc_reserve net/core/skbuff.c:138 [inline] + __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 + alloc_skb include/linux/skbuff.h:984 [inline] + alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234 + sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085 + packet_alloc_skb net/packet/af_packet.c:2803 [inline] + packet_snd net/packet/af_packet.c:2894 [inline] + packet_sendmsg+0x6454/0x8a30 net/packet/af_packet.c:2969 + sock_sendmsg_nosec net/socket.c:630 [inline] + sock_sendmsg net/socket.c:640 [inline] + ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 + __sys_sendmmsg+0x42d/0x800 net/socket.c:2136 + SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167 + SyS_sendmmsg+0x63/0x90 net/socket.c:2162 + do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 + +This change addresses the issue adding the needed check before +accessing the inner header. + +The ipv4 side of the issue is apparently there since the ipv4 over ipv6 +initial support, and the ipv6 side predates git history. + +Fixes: c4d3efafcc93 ("[IPV6] IP6TUNNEL: Add support to IPv4 over IPv6 tunnel.") +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: syzbot+3fde91d4d394747d6db4@syzkaller.appspotmail.com +Tested-by: Alexander Potapenko +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1226,7 +1226,7 @@ static inline int + ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct ip6_tnl *t = netdev_priv(dev); +- const struct iphdr *iph = ip_hdr(skb); ++ const struct iphdr *iph; + int encap_limit = -1; + struct flowi6 fl6; + __u8 dsfield; +@@ -1234,6 +1234,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str + u8 tproto; + int err; + ++ /* ensure we can access the full inner ip header */ ++ if (!pskb_may_pull(skb, sizeof(struct iphdr))) ++ return -1; ++ ++ iph = ip_hdr(skb); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + + tproto = READ_ONCE(t->parms.proto); +@@ -1297,7 +1302,7 @@ static inline int + ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct ip6_tnl *t = netdev_priv(dev); +- struct ipv6hdr *ipv6h = ipv6_hdr(skb); ++ struct ipv6hdr *ipv6h; + int encap_limit = -1; + __u16 offset; + struct flowi6 fl6; +@@ -1306,6 +1311,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str + u8 tproto; + int err; + ++ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) ++ return -1; ++ ++ ipv6h = ipv6_hdr(skb); + tproto = READ_ONCE(t->parms.proto); + if ((tproto != IPPROTO_IPV6 && tproto != 0) || + ip6_tnl_addr_conflict(t, ipv6h)) diff --git a/queue-4.18/ip_tunnel-be-careful-when-accessing-the-inner-header.patch b/queue-4.18/ip_tunnel-be-careful-when-accessing-the-inner-header.patch new file mode 100644 index 00000000000..418fd67d908 --- /dev/null +++ b/queue-4.18/ip_tunnel-be-careful-when-accessing-the-inner-header.patch @@ -0,0 +1,47 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Paolo Abeni +Date: Mon, 24 Sep 2018 15:48:19 +0200 +Subject: ip_tunnel: be careful when accessing the inner header + +From: Paolo Abeni + +[ Upstream commit ccfec9e5cb2d48df5a955b7bf47f7782157d3bc2] + +Cong noted that we need the same checks introduced by commit 76c0ddd8c3a6 +("ip6_tunnel: be careful when accessing the inner header") +even for ipv4 tunnels. + +Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") +Suggested-by: Cong Wang +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -627,6 +627,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, + const struct iphdr *tnl_params, u8 protocol) + { + struct ip_tunnel *tunnel = netdev_priv(dev); ++ unsigned int inner_nhdr_len = 0; + const struct iphdr *inner_iph; + struct flowi4 fl4; + u8 tos, ttl; +@@ -636,6 +637,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, + __be32 dst; + bool connected; + ++ /* ensure we can access the inner net header, for several users below */ ++ if (skb->protocol == htons(ETH_P_IP)) ++ inner_nhdr_len = sizeof(struct iphdr); ++ else if (skb->protocol == htons(ETH_P_IPV6)) ++ inner_nhdr_len = sizeof(struct ipv6hdr); ++ if (unlikely(!pskb_may_pull(skb, inner_nhdr_len))) ++ goto tx_error; ++ + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + connected = (tunnel->parms.iph.daddr != 0); + diff --git a/queue-4.18/ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch b/queue-4.18/ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch new file mode 100644 index 00000000000..958f6602473 --- /dev/null +++ b/queue-4.18/ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch @@ -0,0 +1,42 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Eric Dumazet +Date: Sun, 30 Sep 2018 11:33:39 -0700 +Subject: ipv4: fix use-after-free in ip_cmsg_recv_dstaddr() + +From: Eric Dumazet + +[ Upstream commit 64199fc0a46ba211362472f7f942f900af9492fd ] + +Caching ip_hdr(skb) before a call to pskb_may_pull() is buggy, +do not do it. + +Fixes: 2efd4fca703a ("ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull") +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Reported-by: syzbot +Acked-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_sockglue.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/ipv4/ip_sockglue.c ++++ b/net/ipv4/ip_sockglue.c +@@ -149,7 +149,6 @@ static void ip_cmsg_recv_security(struct + static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) + { + struct sockaddr_in sin; +- const struct iphdr *iph = ip_hdr(skb); + __be16 *ports; + int end; + +@@ -164,7 +163,7 @@ static void ip_cmsg_recv_dstaddr(struct + ports = (__be16 *)skb_transport_header(skb); + + sin.sin_family = AF_INET; +- sin.sin_addr.s_addr = iph->daddr; ++ sin.sin_addr.s_addr = ip_hdr(skb)->daddr; + sin.sin_port = ports[1]; + memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); + diff --git a/queue-4.18/ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch b/queue-4.18/ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch new file mode 100644 index 00000000000..fa9c2196ad1 --- /dev/null +++ b/queue-4.18/ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch @@ -0,0 +1,169 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Wei Wang +Date: Thu, 4 Oct 2018 10:12:37 -0700 +Subject: ipv6: take rcu lock in rawv6_send_hdrinc() + +From: Wei Wang + +[ Upstream commit a688caa34beb2fd2a92f1b6d33e40cde433ba160 ] + +In rawv6_send_hdrinc(), in order to avoid an extra dst_hold(), we +directly assign the dst to skb and set passed in dst to NULL to avoid +double free. +However, in error case, we free skb and then do stats update with the +dst pointer passed in. This causes use-after-free on the dst. +Fix it by taking rcu read lock right before dst could get released to +make sure dst does not get freed until the stats update is done. +Note: we don't have this issue in ipv4 cause dst is not used for stats +update in v4. + +Syzkaller reported following crash: +BUG: KASAN: use-after-free in rawv6_send_hdrinc net/ipv6/raw.c:692 [inline] +BUG: KASAN: use-after-free in rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921 +Read of size 8 at addr ffff8801d95ba730 by task syz-executor0/32088 + +CPU: 1 PID: 32088 Comm: syz-executor0 Not tainted 4.19.0-rc2+ #93 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 + print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 + kasan_report_error mm/kasan/report.c:354 [inline] + kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 + __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 + rawv6_send_hdrinc net/ipv6/raw.c:692 [inline] + rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921 + inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 + sock_sendmsg_nosec net/socket.c:621 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:631 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114 + __sys_sendmsg+0x11d/0x280 net/socket.c:2152 + __do_sys_sendmsg net/socket.c:2161 [inline] + __se_sys_sendmsg net/socket.c:2159 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x457099 +Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007f83756edc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f83756ee6d4 RCX: 0000000000457099 +RDX: 0000000000000000 RSI: 0000000020003840 RDI: 0000000000000004 +RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 00000000004d4b30 R14: 00000000004c90b1 R15: 0000000000000000 + +Allocated by task 32088: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 + kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 + kmem_cache_alloc+0x12e/0x730 mm/slab.c:3554 + dst_alloc+0xbb/0x1d0 net/core/dst.c:105 + ip6_dst_alloc+0x35/0xa0 net/ipv6/route.c:353 + ip6_rt_cache_alloc+0x247/0x7b0 net/ipv6/route.c:1186 + ip6_pol_route+0x8f8/0xd90 net/ipv6/route.c:1895 + ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2093 + fib6_rule_lookup+0x277/0x860 net/ipv6/fib6_rules.c:122 + ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2121 + ip6_route_output include/net/ip6_route.h:88 [inline] + ip6_dst_lookup_tail+0xe27/0x1d60 net/ipv6/ip6_output.c:951 + ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079 + rawv6_sendmsg+0x12d9/0x4630 net/ipv6/raw.c:905 + inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 + sock_sendmsg_nosec net/socket.c:621 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:631 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114 + __sys_sendmsg+0x11d/0x280 net/socket.c:2152 + __do_sys_sendmsg net/socket.c:2161 [inline] + __se_sys_sendmsg net/socket.c:2159 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +Freed by task 5356: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 + kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 + __cache_free mm/slab.c:3498 [inline] + kmem_cache_free+0x83/0x290 mm/slab.c:3756 + dst_destroy+0x267/0x3c0 net/core/dst.c:141 + dst_destroy_rcu+0x16/0x19 net/core/dst.c:154 + __rcu_reclaim kernel/rcu/rcu.h:236 [inline] + rcu_do_batch kernel/rcu/tree.c:2576 [inline] + invoke_rcu_callbacks kernel/rcu/tree.c:2880 [inline] + __rcu_process_callbacks kernel/rcu/tree.c:2847 [inline] + rcu_process_callbacks+0xf23/0x2670 kernel/rcu/tree.c:2864 + __do_softirq+0x30b/0xad8 kernel/softirq.c:292 + +Fixes: 1789a640f556 ("raw: avoid two atomics in xmit") +Signed-off-by: Wei Wang +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/raw.c | 29 ++++++++++++++++++++--------- + 1 file changed, 20 insertions(+), 9 deletions(-) + +--- a/net/ipv6/raw.c ++++ b/net/ipv6/raw.c +@@ -650,8 +650,6 @@ static int rawv6_send_hdrinc(struct sock + skb->protocol = htons(ETH_P_IPV6); + skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; +- skb_dst_set(skb, &rt->dst); +- *dstp = NULL; + + skb_put(skb, length); + skb_reset_network_header(skb); +@@ -664,8 +662,14 @@ static int rawv6_send_hdrinc(struct sock + + skb->transport_header = skb->network_header; + err = memcpy_from_msg(iph, msg, length); +- if (err) +- goto error_fault; ++ if (err) { ++ err = -EFAULT; ++ kfree_skb(skb); ++ goto error; ++ } ++ ++ skb_dst_set(skb, &rt->dst); ++ *dstp = NULL; + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing +@@ -674,21 +678,28 @@ static int rawv6_send_hdrinc(struct sock + if (unlikely(!skb)) + return 0; + ++ /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev ++ * in the error path. Since skb has been freed, the dst could ++ * have been queued for deletion. ++ */ ++ rcu_read_lock(); + IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, + NULL, rt->dst.dev, dst_output); + if (err > 0) + err = net_xmit_errno(err); +- if (err) +- goto error; ++ if (err) { ++ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); ++ rcu_read_unlock(); ++ goto error_check; ++ } ++ rcu_read_unlock(); + out: + return 0; + +-error_fault: +- err = -EFAULT; +- kfree_skb(skb); + error: + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); ++error_check: + if (err == -ENOBUFS && !np->recverr) + err = 0; + return err; diff --git a/queue-4.18/net-aquantia-memory-corruption-on-jumbo-frames.patch b/queue-4.18/net-aquantia-memory-corruption-on-jumbo-frames.patch new file mode 100644 index 00000000000..b61ebc6f191 --- /dev/null +++ b/queue-4.18/net-aquantia-memory-corruption-on-jumbo-frames.patch @@ -0,0 +1,89 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Friedemann Gerold +Date: Sat, 15 Sep 2018 18:03:39 +0300 +Subject: net: aquantia: memory corruption on jumbo frames + +From: Friedemann Gerold + +[ Upstream commit d26ed6b0e5e23190d43ab34bc69cbecdc464a2cf ] + +This patch fixes skb_shared area, which will be corrupted +upon reception of 4K jumbo packets. + +Originally build_skb usage purpose was to reuse page for skb to eliminate +needs of extra fragments. But that logic does not take into account that +skb_shared_info should be reserved at the end of skb data area. + +In case packet data consumes all the page (4K), skb_shinfo location +overflows the page. As a consequence, __build_skb zeroed shinfo data above +the allocated page, corrupting next page. + +The issue is rarely seen in real life because jumbo are normally larger +than 4K and that causes another code path to trigger. +But it 100% reproducible with simple scapy packet, like: + + sendp(IP(dst="192.168.100.3") / TCP(dport=443) \ + / Raw(RandString(size=(4096-40))), iface="enp1s0") + +Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") + +Reported-by: Friedemann Gerold +Reported-by: Michael Rauch +Signed-off-by: Friedemann Gerold +Tested-by: Nikita Danilov +Signed-off-by: Igor Russkikh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 32 ++++++++++++----------- + 1 file changed, 18 insertions(+), 14 deletions(-) + +--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c ++++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +@@ -225,9 +225,10 @@ int aq_ring_rx_clean(struct aq_ring_s *s + } + + /* for single fragment packets use build_skb() */ +- if (buff->is_eop) { ++ if (buff->is_eop && ++ buff->len <= AQ_CFG_RX_FRAME_MAX - AQ_SKB_ALIGN) { + skb = build_skb(page_address(buff->page), +- buff->len + AQ_SKB_ALIGN); ++ AQ_CFG_RX_FRAME_MAX); + if (unlikely(!skb)) { + err = -ENOMEM; + goto err_exit; +@@ -247,18 +248,21 @@ int aq_ring_rx_clean(struct aq_ring_s *s + buff->len - ETH_HLEN, + SKB_TRUESIZE(buff->len - ETH_HLEN)); + +- for (i = 1U, next_ = buff->next, +- buff_ = &self->buff_ring[next_]; true; +- next_ = buff_->next, +- buff_ = &self->buff_ring[next_], ++i) { +- skb_add_rx_frag(skb, i, buff_->page, 0, +- buff_->len, +- SKB_TRUESIZE(buff->len - +- ETH_HLEN)); +- buff_->is_cleaned = 1; +- +- if (buff_->is_eop) +- break; ++ if (!buff->is_eop) { ++ for (i = 1U, next_ = buff->next, ++ buff_ = &self->buff_ring[next_]; ++ true; next_ = buff_->next, ++ buff_ = &self->buff_ring[next_], ++i) { ++ skb_add_rx_frag(skb, i, ++ buff_->page, 0, ++ buff_->len, ++ SKB_TRUESIZE(buff->len - ++ ETH_HLEN)); ++ buff_->is_cleaned = 1; ++ ++ if (buff_->is_eop) ++ break; ++ } + } + } + diff --git a/queue-4.18/net-dsa-b53-keep-cpu-port-as-tagged-in-all-vlans.patch b/queue-4.18/net-dsa-b53-keep-cpu-port-as-tagged-in-all-vlans.patch new file mode 100644 index 00000000000..2542a372621 --- /dev/null +++ b/queue-4.18/net-dsa-b53-keep-cpu-port-as-tagged-in-all-vlans.patch @@ -0,0 +1,48 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Florian Fainelli +Date: Thu, 4 Oct 2018 20:24:13 -0700 +Subject: net: dsa: b53: Keep CPU port as tagged in all VLANs + +From: Florian Fainelli + +[ Upstream commit ca8931948344c485569b04821d1f6bcebccd376b ] + +Commit c499696e7901 ("net: dsa: b53: Stop using dev->cpu_port +incorrectly") was a bit too trigger happy in removing the CPU port from +the VLAN membership because we rely on DSA to program the CPU port VLAN, +which it does, except it does not bother itself with tagged/untagged and +just usese untagged. + +Having the CPU port "follow" the user ports tagged/untagged is not great +and does not allow for properly differentiating, so keep the CPU port +tagged in all VLANs. + +Reported-by: Gerhard Wiesinger +Fixes: c499696e7901 ("net: dsa: b53: Stop using dev->cpu_port incorrectly") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/b53/b53_common.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/dsa/b53/b53_common.c ++++ b/drivers/net/dsa/b53/b53_common.c +@@ -1107,7 +1107,7 @@ void b53_vlan_add(struct dsa_switch *ds, + b53_get_vlan_entry(dev, vid, vl); + + vl->members |= BIT(port); +- if (untagged) ++ if (untagged && !dsa_is_cpu_port(ds, port)) + vl->untag |= BIT(port); + else + vl->untag &= ~BIT(port); +@@ -1149,7 +1149,7 @@ int b53_vlan_del(struct dsa_switch *ds, + pvid = 0; + } + +- if (untagged) ++ if (untagged && !dsa_is_cpu_port(ds, port)) + vl->untag &= ~(BIT(port)); + + b53_set_vlan_entry(dev, vid, vl); diff --git a/queue-4.18/net-dsa-bcm_sf2-call-setup-during-switch-resume.patch b/queue-4.18/net-dsa-bcm_sf2-call-setup-during-switch-resume.patch new file mode 100644 index 00000000000..c676963fe1b --- /dev/null +++ b/queue-4.18/net-dsa-bcm_sf2-call-setup-during-switch-resume.patch @@ -0,0 +1,49 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Florian Fainelli +Date: Tue, 9 Oct 2018 16:48:58 -0700 +Subject: net: dsa: bcm_sf2: Call setup during switch resume + +From: Florian Fainelli + +[ Upstream commit 54baca096386d862d19c10f58f34bf787c6b3cbe ] + +There is no reason to open code what the switch setup function does, in +fact, because we just issued a switch reset, we would make all the +register get their default values, including for instance, having unused +port be enabled again and wasting power and leading to an inappropriate +switch core clock being selected. + +Fixes: 8cfa94984c9c ("net: dsa: bcm_sf2: add suspend/resume callbacks") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 10 +--------- + 1 file changed, 1 insertion(+), 9 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -698,7 +698,6 @@ static int bcm_sf2_sw_suspend(struct dsa + static int bcm_sf2_sw_resume(struct dsa_switch *ds) + { + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); +- unsigned int port; + int ret; + + ret = bcm_sf2_sw_rst(priv); +@@ -710,14 +709,7 @@ static int bcm_sf2_sw_resume(struct dsa_ + if (priv->hw_params.num_gphy == 1) + bcm_sf2_gphy_enable_set(ds, true); + +- for (port = 0; port < DSA_MAX_PORTS; port++) { +- if (dsa_is_user_port(ds, port)) +- bcm_sf2_port_setup(ds, port, NULL); +- else if (dsa_is_cpu_port(ds, port)) +- bcm_sf2_imp_setup(ds, port); +- } +- +- bcm_sf2_enable_acb(ds); ++ ds->ops->setup(ds); + + return 0; + } diff --git a/queue-4.18/net-dsa-bcm_sf2-fix-unbind-ordering.patch b/queue-4.18/net-dsa-bcm_sf2-fix-unbind-ordering.patch new file mode 100644 index 00000000000..c29fa440dab --- /dev/null +++ b/queue-4.18/net-dsa-bcm_sf2-fix-unbind-ordering.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Florian Fainelli +Date: Tue, 9 Oct 2018 16:48:57 -0700 +Subject: net: dsa: bcm_sf2: Fix unbind ordering + +From: Florian Fainelli + +[ Upstream commit bf3b452b7af787b8bf27de6490dc4eedf6f97599 ] + +The order in which we release resources is unfortunately leading to bus +errors while dismantling the port. This is because we set +priv->wol_ports_mask to 0 to tell bcm_sf2_sw_suspend() that it is now +permissible to clock gate the switch. Later on, when dsa_slave_destroy() +comes in from dsa_unregister_switch() and calls +dsa_switch_ops::port_disable, we perform the same dismantling again, and +this time we hit registers that are clock gated. + +Make sure that dsa_unregister_switch() is the first thing that happens, +which takes care of releasing all user visible resources, then proceed +with clock gating hardware. We still need to set priv->wol_ports_mask to +0 to make sure that an enabled port properly gets disabled in case it +was previously used as part of Wake-on-LAN. + +Fixes: d9338023fb8e ("net: dsa: bcm_sf2: Make it a real platform device driver") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -1160,10 +1160,10 @@ static int bcm_sf2_sw_remove(struct plat + { + struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + +- /* Disable all ports and interrupts */ + priv->wol_ports_mask = 0; +- bcm_sf2_sw_suspend(priv->dev->ds); + dsa_unregister_switch(priv->dev->ds); ++ /* Disable all ports and interrupts */ ++ bcm_sf2_sw_suspend(priv->dev->ds); + bcm_sf2_mdio_unregister(priv); + + return 0; diff --git a/queue-4.18/net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch b/queue-4.18/net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch new file mode 100644 index 00000000000..4ab7c646f45 --- /dev/null +++ b/queue-4.18/net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch @@ -0,0 +1,47 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: "Maciej Żenczykowski" +Date: Sat, 22 Sep 2018 01:34:01 -0700 +Subject: net-ethtool: ETHTOOL_GUFO did not and should not require CAP_NET_ADMIN + +From: "Maciej Żenczykowski" + +[ Upstream commit 474ff2600889e16280dbc6ada8bfecb216169a70 ] + +So it should not fail with EPERM even though it is no longer implemented... + +This is a fix for: + (userns)$ egrep ^Cap /proc/self/status + CapInh: 0000003fffffffff + CapPrm: 0000003fffffffff + CapEff: 0000003fffffffff + CapBnd: 0000003fffffffff + CapAmb: 0000003fffffffff + + (userns)$ tcpdump -i usb_rndis0 + tcpdump: WARNING: usb_rndis0: SIOCETHTOOL(ETHTOOL_GUFO) ioctl failed: Operation not permitted + Warning: Kernel filter failed: Bad file descriptor + tcpdump: can't remove kernel filter: Bad file descriptor + +With this change it returns EOPNOTSUPP instead of EPERM. + +See also https://github.com/the-tcpdump-group/libpcap/issues/689 + +Fixes: 08a00fea6de2 "net: Remove references to NETIF_F_UFO from ethtool." +Cc: David S. Miller +Signed-off-by: Maciej Żenczykowski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/ethtool.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/ethtool.c ++++ b/net/core/ethtool.c +@@ -2623,6 +2623,7 @@ int dev_ethtool(struct net *net, struct + case ETHTOOL_GPHYSTATS: + case ETHTOOL_GTSO: + case ETHTOOL_GPERMADDR: ++ case ETHTOOL_GUFO: + case ETHTOOL_GGSO: + case ETHTOOL_GGRO: + case ETHTOOL_GFLAGS: diff --git a/queue-4.18/net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch b/queue-4.18/net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch new file mode 100644 index 00000000000..6895514c2f4 --- /dev/null +++ b/queue-4.18/net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch @@ -0,0 +1,102 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Yunsheng Lin +Date: Tue, 25 Sep 2018 10:21:55 +0100 +Subject: net: hns: fix for unmapping problem when SMMU is on + +From: Yunsheng Lin + +[ Upstream commit 2e9361efa707e186d91b938e44f9e326725259f7 ] + +If SMMU is on, there is more likely that skb_shinfo(skb)->frags[i] +can not send by a single BD. when this happen, the +hns_nic_net_xmit_hw function map the whole data in a frags using +skb_frag_dma_map, but unmap each BD' data individually when tx is +done, which causes problem when SMMU is on. + +This patch fixes this problem by ummapping the whole data in a +frags when tx is done. + +Signed-off-by: Yunsheng Lin +Signed-off-by: Peng Li +Reviewed-by: Yisen Zhuang +Signed-off-by: Salil Mehta +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/hisilicon/hns/hnae.c | 2 - + drivers/net/ethernet/hisilicon/hns/hns_enet.c | 30 ++++++++++++++++---------- + 2 files changed, 20 insertions(+), 12 deletions(-) + +--- a/drivers/net/ethernet/hisilicon/hns/hnae.c ++++ b/drivers/net/ethernet/hisilicon/hns/hnae.c +@@ -84,7 +84,7 @@ static void hnae_unmap_buffer(struct hna + if (cb->type == DESC_TYPE_SKB) + dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length, + ring_to_dma_dir(ring)); +- else ++ else if (cb->length) + dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length, + ring_to_dma_dir(ring)); + } +--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c ++++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c +@@ -40,9 +40,9 @@ + #define SKB_TMP_LEN(SKB) \ + (((SKB)->transport_header - (SKB)->mac_header) + tcp_hdrlen(SKB)) + +-static void fill_v2_desc(struct hnae_ring *ring, void *priv, +- int size, dma_addr_t dma, int frag_end, +- int buf_num, enum hns_desc_type type, int mtu) ++static void fill_v2_desc_hw(struct hnae_ring *ring, void *priv, int size, ++ int send_sz, dma_addr_t dma, int frag_end, ++ int buf_num, enum hns_desc_type type, int mtu) + { + struct hnae_desc *desc = &ring->desc[ring->next_to_use]; + struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; +@@ -64,7 +64,7 @@ static void fill_v2_desc(struct hnae_rin + desc_cb->type = type; + + desc->addr = cpu_to_le64(dma); +- desc->tx.send_size = cpu_to_le16((u16)size); ++ desc->tx.send_size = cpu_to_le16((u16)send_sz); + + /* config bd buffer end */ + hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1); +@@ -133,6 +133,14 @@ static void fill_v2_desc(struct hnae_rin + ring_ptr_move_fw(ring, next_to_use); + } + ++static void fill_v2_desc(struct hnae_ring *ring, void *priv, ++ int size, dma_addr_t dma, int frag_end, ++ int buf_num, enum hns_desc_type type, int mtu) ++{ ++ fill_v2_desc_hw(ring, priv, size, size, dma, frag_end, ++ buf_num, type, mtu); ++} ++ + static const struct acpi_device_id hns_enet_acpi_match[] = { + { "HISI00C1", 0 }, + { "HISI00C2", 0 }, +@@ -289,15 +297,15 @@ static void fill_tso_desc(struct hnae_ri + + /* when the frag size is bigger than hardware, split this frag */ + for (k = 0; k < frag_buf_num; k++) +- fill_v2_desc(ring, priv, +- (k == frag_buf_num - 1) ? ++ fill_v2_desc_hw(ring, priv, k == 0 ? size : 0, ++ (k == frag_buf_num - 1) ? + sizeoflast : BD_MAX_SEND_SIZE, +- dma + BD_MAX_SEND_SIZE * k, +- frag_end && (k == frag_buf_num - 1) ? 1 : 0, +- buf_num, +- (type == DESC_TYPE_SKB && !k) ? ++ dma + BD_MAX_SEND_SIZE * k, ++ frag_end && (k == frag_buf_num - 1) ? 1 : 0, ++ buf_num, ++ (type == DESC_TYPE_SKB && !k) ? + DESC_TYPE_SKB : DESC_TYPE_PAGE, +- mtu); ++ mtu); + } + + netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, diff --git a/queue-4.18/net-ipv4-don-t-let-pmtu-updates-increase-route-mtu.patch b/queue-4.18/net-ipv4-don-t-let-pmtu-updates-increase-route-mtu.patch new file mode 100644 index 00000000000..7cb1e000e09 --- /dev/null +++ b/queue-4.18/net-ipv4-don-t-let-pmtu-updates-increase-route-mtu.patch @@ -0,0 +1,58 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Sabrina Dubroca +Date: Tue, 9 Oct 2018 17:48:15 +0200 +Subject: net: ipv4: don't let PMTU updates increase route MTU + +From: Sabrina Dubroca + +[ Upstream commit 28d35bcdd3925e7293408cdb8aa5f2aac5f0d6e3 ] + +When an MTU update with PMTU smaller than net.ipv4.route.min_pmtu is +received, we must clamp its value. However, we can receive a PMTU +exception with PMTU < old_mtu < ip_rt_min_pmtu, which would lead to an +increase in PMTU. + +To fix this, take the smallest of the old MTU and ip_rt_min_pmtu. + +Before this patch, in case of an update, the exception's MTU would +always change. Now, an exception can have only its lock flag updated, +but not the MTU, so we need to add a check on locking to the following +"is this exception getting updated, or close to expiring?" test. + +Fixes: d52e5a7e7ca4 ("ipv4: lock mtu in fnhe when received PMTU < net.ipv4.route.min_pmtu") +Signed-off-by: Sabrina Dubroca +Reviewed-by: Stefano Brivio +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1001,21 +1001,22 @@ out: kfree_skb(skb); + static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) + { + struct dst_entry *dst = &rt->dst; ++ u32 old_mtu = ipv4_mtu(dst); + struct fib_result res; + bool lock = false; + + if (ip_mtu_locked(dst)) + return; + +- if (ipv4_mtu(dst) < mtu) ++ if (old_mtu < mtu) + return; + + if (mtu < ip_rt_min_pmtu) { + lock = true; +- mtu = ip_rt_min_pmtu; ++ mtu = min(old_mtu, ip_rt_min_pmtu); + } + +- if (rt->rt_pmtu == mtu && ++ if (rt->rt_pmtu == mtu && !lock && + time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) + return; + diff --git a/queue-4.18/net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch b/queue-4.18/net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch new file mode 100644 index 00000000000..c2ef1e42d3a --- /dev/null +++ b/queue-4.18/net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch @@ -0,0 +1,218 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Sabrina Dubroca +Date: Tue, 9 Oct 2018 17:48:14 +0200 +Subject: net: ipv4: update fnhe_pmtu when first hop's MTU changes + +From: Sabrina Dubroca + +[ Upstream commit af7d6cce53694a88d6a1bb60c9a239a6a5144459 ] + +Since commit 5aad1de5ea2c ("ipv4: use separate genid for next hop +exceptions"), exceptions get deprecated separately from cached +routes. In particular, administrative changes don't clear PMTU anymore. + +As Stefano described in commit e9fa1495d738 ("ipv6: Reflect MTU changes +on PMTU of exceptions for MTU-less routes"), the PMTU discovered before +the local MTU change can become stale: + - if the local MTU is now lower than the PMTU, that PMTU is now + incorrect + - if the local MTU was the lowest value in the path, and is increased, + we might discover a higher PMTU + +Similarly to what commit e9fa1495d738 did for IPv6, update PMTU in those +cases. + +If the exception was locked, the discovered PMTU was smaller than the +minimal accepted PMTU. In that case, if the new local MTU is smaller +than the current PMTU, let PMTU discovery figure out if locking of the +exception is still needed. + +To do this, we need to know the old link MTU in the NETDEV_CHANGEMTU +notifier. By the time the notifier is called, dev->mtu has been +changed. This patch adds the old MTU as additional information in the +notifier structure, and a new call_netdevice_notifiers_u32() function. + +Fixes: 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions") +Signed-off-by: Sabrina Dubroca +Reviewed-by: Stefano Brivio +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 7 ++++++ + include/net/ip_fib.h | 1 + net/core/dev.c | 28 +++++++++++++++++++++++-- + net/ipv4/fib_frontend.c | 12 +++++++---- + net/ipv4/fib_semantics.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++ + 5 files changed, 92 insertions(+), 6 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -2420,6 +2420,13 @@ struct netdev_notifier_info { + struct netlink_ext_ack *extack; + }; + ++struct netdev_notifier_info_ext { ++ struct netdev_notifier_info info; /* must be first */ ++ union { ++ u32 mtu; ++ } ext; ++}; ++ + struct netdev_notifier_change_info { + struct netdev_notifier_info info; /* must be first */ + unsigned int flags_changed; +--- a/include/net/ip_fib.h ++++ b/include/net/ip_fib.h +@@ -394,6 +394,7 @@ int ip_fib_check_default(__be32 gw, stru + int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); + int fib_sync_down_addr(struct net_device *dev, __be32 local); + int fib_sync_up(struct net_device *dev, unsigned int nh_flags); ++void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); + + #ifdef CONFIG_IP_ROUTE_MULTIPATH + int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1754,6 +1754,28 @@ int call_netdevice_notifiers(unsigned lo + } + EXPORT_SYMBOL(call_netdevice_notifiers); + ++/** ++ * call_netdevice_notifiers_mtu - call all network notifier blocks ++ * @val: value passed unmodified to notifier function ++ * @dev: net_device pointer passed unmodified to notifier function ++ * @arg: additional u32 argument passed to the notifier function ++ * ++ * Call all network notifier blocks. Parameters and return value ++ * are as for raw_notifier_call_chain(). ++ */ ++static int call_netdevice_notifiers_mtu(unsigned long val, ++ struct net_device *dev, u32 arg) ++{ ++ struct netdev_notifier_info_ext info = { ++ .info.dev = dev, ++ .ext.mtu = arg, ++ }; ++ ++ BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0); ++ ++ return call_netdevice_notifiers_info(val, &info.info); ++} ++ + #ifdef CONFIG_NET_INGRESS + static DEFINE_STATIC_KEY_FALSE(ingress_needed_key); + +@@ -7118,14 +7140,16 @@ int dev_set_mtu(struct net_device *dev, + err = __dev_set_mtu(dev, new_mtu); + + if (!err) { +- err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); ++ err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, ++ orig_mtu); + err = notifier_to_errno(err); + if (err) { + /* setting mtu back and notifying everyone again, + * so that they have a chance to revert changes. + */ + __dev_set_mtu(dev, orig_mtu); +- call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); ++ call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, ++ new_mtu); + } + } + return err; +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -1243,7 +1243,8 @@ static int fib_inetaddr_event(struct not + static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) + { + struct net_device *dev = netdev_notifier_info_to_dev(ptr); +- struct netdev_notifier_changeupper_info *info; ++ struct netdev_notifier_changeupper_info *upper_info = ptr; ++ struct netdev_notifier_info_ext *info_ext = ptr; + struct in_device *in_dev; + struct net *net = dev_net(dev); + unsigned int flags; +@@ -1278,16 +1279,19 @@ static int fib_netdev_event(struct notif + fib_sync_up(dev, RTNH_F_LINKDOWN); + else + fib_sync_down_dev(dev, event, false); +- /* fall through */ ++ rt_cache_flush(net); ++ break; + case NETDEV_CHANGEMTU: ++ fib_sync_mtu(dev, info_ext->ext.mtu); + rt_cache_flush(net); + break; + case NETDEV_CHANGEUPPER: +- info = ptr; ++ upper_info = ptr; + /* flush all routes if dev is linked to or unlinked from + * an L3 master device (e.g., VRF) + */ +- if (info->upper_dev && netif_is_l3_master(info->upper_dev)) ++ if (upper_info->upper_dev && ++ netif_is_l3_master(upper_info->upper_dev)) + fib_disable_ip(dev, NETDEV_DOWN, true); + break; + } +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -1470,6 +1470,56 @@ static int call_fib_nh_notifiers(struct + return NOTIFY_DONE; + } + ++/* Update the PMTU of exceptions when: ++ * - the new MTU of the first hop becomes smaller than the PMTU ++ * - the old MTU was the same as the PMTU, and it limited discovery of ++ * larger MTUs on the path. With that limit raised, we can now ++ * discover larger MTUs ++ * A special case is locked exceptions, for which the PMTU is smaller ++ * than the minimal accepted PMTU: ++ * - if the new MTU is greater than the PMTU, don't make any change ++ * - otherwise, unlock and set PMTU ++ */ ++static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) ++{ ++ struct fnhe_hash_bucket *bucket; ++ int i; ++ ++ bucket = rcu_dereference_protected(nh->nh_exceptions, 1); ++ if (!bucket) ++ return; ++ ++ for (i = 0; i < FNHE_HASH_SIZE; i++) { ++ struct fib_nh_exception *fnhe; ++ ++ for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); ++ fnhe; ++ fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { ++ if (fnhe->fnhe_mtu_locked) { ++ if (new <= fnhe->fnhe_pmtu) { ++ fnhe->fnhe_pmtu = new; ++ fnhe->fnhe_mtu_locked = false; ++ } ++ } else if (new < fnhe->fnhe_pmtu || ++ orig == fnhe->fnhe_pmtu) { ++ fnhe->fnhe_pmtu = new; ++ } ++ } ++ } ++} ++ ++void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) ++{ ++ unsigned int hash = fib_devindex_hashfn(dev->ifindex); ++ struct hlist_head *head = &fib_info_devhash[hash]; ++ struct fib_nh *nh; ++ ++ hlist_for_each_entry(nh, head, nh_hash) { ++ if (nh->nh_dev == dev) ++ nh_update_mtu(nh, dev->mtu, orig_mtu); ++ } ++} ++ + /* Event force Flags Description + * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host + * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host diff --git a/queue-4.18/net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch b/queue-4.18/net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch new file mode 100644 index 00000000000..72cf48d7239 --- /dev/null +++ b/queue-4.18/net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch @@ -0,0 +1,61 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Jeff Barnhill <0xeffeff@gmail.com> +Date: Fri, 21 Sep 2018 00:45:27 +0000 +Subject: net/ipv6: Display all addresses in output of /proc/net/if_inet6 + +From: Jeff Barnhill <0xeffeff@gmail.com> + +[ Upstream commit 86f9bd1ff61c413a2a251fa736463295e4e24733 ] + +The backend handling for /proc/net/if_inet6 in addrconf.c doesn't properly +handle starting/stopping the iteration. The problem is that at some point +during the iteration, an overflow is detected and the process is +subsequently stopped. The item being shown via seq_printf() when the +overflow occurs is not actually shown, though. When start() is +subsequently called to resume iterating, it returns the next item, and +thus the item that was being processed when the overflow occurred never +gets printed. + +Alter the meaning of the private data member "offset". Currently, when it +is not 0 (which only happens at the very beginning), "offset" represents +the next hlist item to be printed. After this change, "offset" always +represents the current item. + +This is also consistent with the private data member "bucket", which +represents the current bucket, and also the use of "pos" as defined in +seq_file.txt: + The pos passed to start() will always be either zero, or the most + recent pos used in the previous session. + +Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -4203,7 +4203,6 @@ static struct inet6_ifaddr *if6_get_firs + p++; + continue; + } +- state->offset++; + return ifa; + } + +@@ -4227,13 +4226,12 @@ static struct inet6_ifaddr *if6_get_next + return ifa; + } + ++ state->offset = 0; + while (++state->bucket < IN6_ADDR_HSIZE) { +- state->offset = 0; + hlist_for_each_entry_rcu(ifa, + &inet6_addr_lst[state->bucket], addr_lst) { + if (!net_eq(dev_net(ifa->idev->dev), net)) + continue; +- state->offset++; + return ifa; + } + } diff --git a/queue-4.18/net-ipv6-remove-extra-call-to-ip6_convert_metrics-for-multipath-case.patch b/queue-4.18/net-ipv6-remove-extra-call-to-ip6_convert_metrics-for-multipath-case.patch new file mode 100644 index 00000000000..b30ff7c352b --- /dev/null +++ b/queue-4.18/net-ipv6-remove-extra-call-to-ip6_convert_metrics-for-multipath-case.patch @@ -0,0 +1,37 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: David Ahern +Date: Wed, 26 Sep 2018 17:35:14 -0700 +Subject: net/ipv6: Remove extra call to ip6_convert_metrics for multipath case + +From: David Ahern + +[ Upstream commit 36f19d5b4f99fa9fa8263877e5f8e669d7fddc14 ] + +The change to move metrics from the dst to rt6_info moved the call +to ip6_convert_metrics from ip6_route_add to ip6_route_info_create. In +doing so it makes the call in ip6_route_info_append redundant and +actually leaks the metrics installed as part of the ip6_route_info_create. +Remove the now unnecessary call. + +Fixes: d4ead6b34b67f ("net/ipv6: move metrics from dst to rt6_info") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 5 ----- + 1 file changed, 5 deletions(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -4314,11 +4314,6 @@ static int ip6_route_info_append(struct + if (!nh) + return -ENOMEM; + nh->fib6_info = rt; +- err = ip6_convert_metrics(net, rt, r_cfg); +- if (err) { +- kfree(nh); +- return err; +- } + memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); + list_add_tail(&nh->next, rt6_nh_list); + diff --git a/queue-4.18/net-ipv6-stop-leaking-percpu-memory-in-fib6-info.patch b/queue-4.18/net-ipv6-stop-leaking-percpu-memory-in-fib6-info.patch new file mode 100644 index 00000000000..a38df75cda3 --- /dev/null +++ b/queue-4.18/net-ipv6-stop-leaking-percpu-memory-in-fib6-info.patch @@ -0,0 +1,32 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Mike Rapoport +Date: Tue, 9 Oct 2018 07:02:01 +0300 +Subject: net/ipv6: stop leaking percpu memory in fib6 info + +From: Mike Rapoport + +[ Upstream commit 7abab7b9b498650404800a08765f44929fee8f31 ] + +The fib6_info_alloc() function allocates percpu memory to hold per CPU +pointers to rt6_info, but this memory is never freed. Fix it. + +Fixes: a64efe142f5e ("net/ipv6: introduce fib6_info struct and helpers") +Signed-off-by: Mike Rapoport +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_fib.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -196,6 +196,8 @@ void fib6_info_destroy_rcu(struct rcu_he + *ppcpu_rt = NULL; + } + } ++ ++ free_percpu(f6i->rt6i_pcpu); + } + + lwtstate_put(f6i->fib6_nh.nh_lwtstate); diff --git a/queue-4.18/net-mlx5-check-for-sq-and-not-rq-state-when-modifying-hairpin-sq.patch b/queue-4.18/net-mlx5-check-for-sq-and-not-rq-state-when-modifying-hairpin-sq.patch new file mode 100644 index 00000000000..0a5fefa6bb0 --- /dev/null +++ b/queue-4.18/net-mlx5-check-for-sq-and-not-rq-state-when-modifying-hairpin-sq.patch @@ -0,0 +1,39 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Alaa Hleihel +Date: Mon, 3 Sep 2018 10:38:14 +0300 +Subject: net/mlx5: Check for SQ and not RQ state when modifying hairpin SQ + +From: Alaa Hleihel + +[ Upstream commit 6b359d5550a1ae7a1269c9dc1dd73dfdc4d6fe58 ] + +When modifying hairpin SQ, instead of checking if the next state equals +to MLX5_SQC_STATE_RDY, we compare it against the MLX5_RQC_STATE_RDY enum +value. + +The code worked since both of MLX5_RQC_STATE_RDY and MLX5_SQC_STATE_RDY +have the same value today. + +This patch fixes this issue. + +Fixes: 18e568c390c6 ("net/mlx5: Hairpin pair core object setup") +Change-Id: I6758aa7b4bd137966ae28206b70648c5bc223b46 +Signed-off-by: Alaa Hleihel +Reviewed-by: Or Gerlitz +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +@@ -509,7 +509,7 @@ static int mlx5_hairpin_modify_sq(struct + + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + +- if (next_state == MLX5_RQC_STATE_RDY) { ++ if (next_state == MLX5_SQC_STATE_RDY) { + MLX5_SET(sqc, sqc, hairpin_peer_rq, peer_rq); + MLX5_SET(sqc, sqc, hairpin_peer_vhca, peer_vhca); + } diff --git a/queue-4.18/net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch b/queue-4.18/net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch new file mode 100644 index 00000000000..9ed707f60c1 --- /dev/null +++ b/queue-4.18/net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Eran Ben Elisha +Date: Sun, 16 Sep 2018 14:45:27 +0300 +Subject: net/mlx5: E-Switch, Fix out of bound access when setting vport rate + +From: Eran Ben Elisha + +[ Upstream commit 11aa5800ed66ed0415b7509f02881c76417d212a ] + +The code that deals with eswitch vport bw guarantee was going beyond the +eswitch vport array limit, fix that. This was pointed out by the kernel +address sanitizer (KASAN). + +The error from KASAN log: +[2018-09-15 15:04:45] BUG: KASAN: slab-out-of-bounds in +mlx5_eswitch_set_vport_rate+0x8c1/0xae0 [mlx5_core] + +Fixes: c9497c98901c ("net/mlx5: Add support for setting VF min rate") +Signed-off-by: Eran Ben Elisha +Reviewed-by: Or Gerlitz +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +@@ -2000,7 +2000,7 @@ static u32 calculate_vports_min_rate_div + u32 max_guarantee = 0; + int i; + +- for (i = 0; i <= esw->total_vports; i++) { ++ for (i = 0; i < esw->total_vports; i++) { + evport = &esw->vports[i]; + if (!evport->enabled || evport->info.min_rate < max_guarantee) + continue; +@@ -2020,7 +2020,7 @@ static int normalize_vports_min_rate(str + int err; + int i; + +- for (i = 0; i <= esw->total_vports; i++) { ++ for (i = 0; i < esw->total_vports; i++) { + evport = &esw->vports[i]; + if (!evport->enabled) + continue; diff --git a/queue-4.18/net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch b/queue-4.18/net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch new file mode 100644 index 00000000000..334176a3b81 --- /dev/null +++ b/queue-4.18/net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch @@ -0,0 +1,38 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Jianbo Liu +Date: Sat, 25 Aug 2018 03:29:58 +0000 +Subject: net/mlx5e: Set vlan masks for all offloaded TC rules + +From: Jianbo Liu + +[ Upstream commit cee26487620bc9bc3c7db21b6984d91f7bae12ae ] + +In flow steering, if asked to, the hardware matches on the first ethertype +which is not vlan. It's possible to set a rule as follows, which is meant +to match on untagged packet, but will match on a vlan packet: + tc filter add dev eth0 parent ffff: protocol ip flower ... + +To avoid this for packets with single tag, we set vlan masks to tell +hardware to check the tags for every matched packet. + +Fixes: 095b6cfd69ce ('net/mlx5e: Add TC vlan match parsing') +Signed-off-by: Jianbo Liu +Reviewed-by: Or Gerlitz +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -1339,6 +1339,9 @@ static int __parse_cls_flower(struct mlx + + *match_level = MLX5_MATCH_L2; + } ++ } else { ++ MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); ++ MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { diff --git a/queue-4.18/net-mscc-fix-the-frame-extraction-into-the-skb.patch b/queue-4.18/net-mscc-fix-the-frame-extraction-into-the-skb.patch new file mode 100644 index 00000000000..d8e4cca7461 --- /dev/null +++ b/queue-4.18/net-mscc-fix-the-frame-extraction-into-the-skb.patch @@ -0,0 +1,59 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Antoine Tenart +Date: Thu, 20 Sep 2018 12:08:54 +0200 +Subject: net: mscc: fix the frame extraction into the skb + +From: Antoine Tenart + +[ Upstream commit 652ef42c134da1bbb03bd4c9b4291dfaf8d7febb ] + +When extracting frames from the Ocelot switch, the frame check sequence +(FCS) is present at the end of the data extracted. The FCS was put into +the sk buffer which introduced some issues (as length related ones), as +the FCS shouldn't be part of an Rx sk buffer. + +This patch fixes the Ocelot switch extraction behaviour by discarding +the FCS. + +Fixes: a556c76adc05 ("net: mscc: Add initial Ocelot switch support") +Signed-off-by: Antoine Tenart +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mscc/ocelot_board.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/mscc/ocelot_board.c ++++ b/drivers/net/ethernet/mscc/ocelot_board.c +@@ -91,7 +91,7 @@ static irqreturn_t ocelot_xtr_irq_handle + struct sk_buff *skb; + struct net_device *dev; + u32 *buf; +- int sz, len; ++ int sz, len, buf_len; + u32 ifh[4]; + u32 val; + struct frame_info info; +@@ -116,14 +116,20 @@ static irqreturn_t ocelot_xtr_irq_handle + err = -ENOMEM; + break; + } +- buf = (u32 *)skb_put(skb, info.len); ++ buf_len = info.len - ETH_FCS_LEN; ++ buf = (u32 *)skb_put(skb, buf_len); + + len = 0; + do { + sz = ocelot_rx_frame_word(ocelot, grp, false, &val); + *buf++ = val; + len += sz; +- } while ((sz == 4) && (len < info.len)); ++ } while (len < buf_len); ++ ++ /* Read the FCS and discard it */ ++ sz = ocelot_rx_frame_word(ocelot, grp, false, &val); ++ /* Update the statistics if part of the FCS was read before */ ++ len -= ETH_FCS_LEN - sz; + + if (sz < 0) { + err = sz; diff --git a/queue-4.18/net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch b/queue-4.18/net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch new file mode 100644 index 00000000000..07cbc993a31 --- /dev/null +++ b/queue-4.18/net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch @@ -0,0 +1,66 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Maxime Chevallier +Date: Fri, 5 Oct 2018 09:04:40 +0200 +Subject: net: mvpp2: Extract the correct ethtype from the skb for tx csum offload + +From: Maxime Chevallier + +[ Upstream commit 35f3625c21852ad839f20c91c7d81c4c1101e207 ] + +When offloading the L3 and L4 csum computation on TX, we need to extract +the l3_proto from the ethtype, independently of the presence of a vlan +tag. + +The actual driver uses skb->protocol as-is, resulting in packets with +the wrong L4 checksum being sent when there's a vlan tag in the packet +header and checksum offloading is enabled. + +This commit makes use of vlan_protocol_get() to get the correct ethtype +regardless the presence of a vlan tag. + +Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") +Signed-off-by: Maxime Chevallier +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c ++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +@@ -1725,7 +1725,7 @@ static void mvpp2_txq_desc_put(struct mv + } + + /* Set Tx descriptors fields relevant for CSUM calculation */ +-static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto, ++static u32 mvpp2_txq_desc_csum(int l3_offs, __be16 l3_proto, + int ip_hdr_len, int l4_proto) + { + u32 command; +@@ -2600,14 +2600,15 @@ static u32 mvpp2_skb_tx_csum(struct mvpp + if (skb->ip_summed == CHECKSUM_PARTIAL) { + int ip_hdr_len = 0; + u8 l4_proto; ++ __be16 l3_proto = vlan_get_protocol(skb); + +- if (skb->protocol == htons(ETH_P_IP)) { ++ if (l3_proto == htons(ETH_P_IP)) { + struct iphdr *ip4h = ip_hdr(skb); + + /* Calculate IPv4 checksum and L4 checksum */ + ip_hdr_len = ip4h->ihl; + l4_proto = ip4h->protocol; +- } else if (skb->protocol == htons(ETH_P_IPV6)) { ++ } else if (l3_proto == htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = ipv6_hdr(skb); + + /* Read l4_protocol from one of IPv6 extra headers */ +@@ -2619,7 +2620,7 @@ static u32 mvpp2_skb_tx_csum(struct mvpp + } + + return mvpp2_txq_desc_csum(skb_network_offset(skb), +- skb->protocol, ip_hdr_len, l4_proto); ++ l3_proto, ip_hdr_len, l4_proto); + } + + return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE; diff --git a/queue-4.18/net-mvpp2-fix-a-txq_done-race-condition.patch b/queue-4.18/net-mvpp2-fix-a-txq_done-race-condition.patch new file mode 100644 index 00000000000..747dffaa8b1 --- /dev/null +++ b/queue-4.18/net-mvpp2-fix-a-txq_done-race-condition.patch @@ -0,0 +1,43 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Antoine Tenart +Date: Tue, 18 Sep 2018 16:58:47 +0200 +Subject: net: mvpp2: fix a txq_done race condition + +From: Antoine Tenart + +[ Upstream commit 774268f3e51b53ed432a1ec516574fd5ba469398 ] + +When no Tx IRQ is available, the txq_done() routine (called from +tx_done()) shouldn't be called from the polling function, as in such +case it is already called in the Tx path thanks to an hrtimer. This +mostly occurred when using PPv2.1, as the engine then do not have Tx +IRQs. + +Fixes: edc660fa09e2 ("net: mvpp2: replace TX coalescing interrupts with hrtimer") +Reported-by: Stefan Chulski +Signed-off-by: Antoine Tenart +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c ++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +@@ -3056,10 +3056,12 @@ static int mvpp2_poll(struct napi_struct + cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK); + } + +- cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK; +- if (cause_tx) { +- cause_tx >>= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET; +- mvpp2_tx_done(port, cause_tx, qv->sw_thread_id); ++ if (port->has_tx_irqs) { ++ cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK; ++ if (cause_tx) { ++ cause_tx >>= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET; ++ mvpp2_tx_done(port, cause_tx, qv->sw_thread_id); ++ } + } + + /* Process RX packets */ diff --git a/queue-4.18/net-packet-fix-packet-drop-as-of-virtio-gso.patch b/queue-4.18/net-packet-fix-packet-drop-as-of-virtio-gso.patch new file mode 100644 index 00000000000..8b1e8ed8da5 --- /dev/null +++ b/queue-4.18/net-packet-fix-packet-drop-as-of-virtio-gso.patch @@ -0,0 +1,79 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Jianfeng Tan +Date: Sat, 29 Sep 2018 15:41:27 +0000 +Subject: net/packet: fix packet drop as of virtio gso + +From: Jianfeng Tan + +[ Upstream commit 9d2f67e43b73e8af7438be219b66a5de0cfa8bd9 ] + +When we use raw socket as the vhost backend, a packet from virito with +gso offloading information, cannot be sent out in later validaton at +xmit path, as we did not set correct skb->protocol which is further used +for looking up the gso function. + +To fix this, we set this field according to virito hdr information. + +Fixes: e858fae2b0b8f4 ("virtio_net: use common code for virtio_net_hdr and skb GSO conversion") +Signed-off-by: Jianfeng Tan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/virtio_net.h | 18 ++++++++++++++++++ + net/packet/af_packet.c | 11 +++++++---- + 2 files changed, 25 insertions(+), 4 deletions(-) + +--- a/include/linux/virtio_net.h ++++ b/include/linux/virtio_net.h +@@ -5,6 +5,24 @@ + #include + #include + ++static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, ++ const struct virtio_net_hdr *hdr) ++{ ++ switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { ++ case VIRTIO_NET_HDR_GSO_TCPV4: ++ case VIRTIO_NET_HDR_GSO_UDP: ++ skb->protocol = cpu_to_be16(ETH_P_IP); ++ break; ++ case VIRTIO_NET_HDR_GSO_TCPV6: ++ skb->protocol = cpu_to_be16(ETH_P_IPV6); ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ + static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, + const struct virtio_net_hdr *hdr, + bool little_endian) +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2712,10 +2712,12 @@ tpacket_error: + } + } + +- if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr, +- vio_le())) { +- tp_len = -EINVAL; +- goto tpacket_error; ++ if (po->has_vnet_hdr) { ++ if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { ++ tp_len = -EINVAL; ++ goto tpacket_error; ++ } ++ virtio_net_hdr_set_proto(skb, vnet_hdr); + } + + skb->destructor = tpacket_destruct_skb; +@@ -2911,6 +2913,7 @@ static int packet_snd(struct socket *soc + if (err) + goto out_free; + len += sizeof(vnet_hdr); ++ virtio_net_hdr_set_proto(skb, &vnet_hdr); + } + + skb_probe_transport_header(skb, reserve); diff --git a/queue-4.18/net-phy-phylink-fix-sfp-interface-autodetection.patch b/queue-4.18/net-phy-phylink-fix-sfp-interface-autodetection.patch new file mode 100644 index 00000000000..66964a39708 --- /dev/null +++ b/queue-4.18/net-phy-phylink-fix-sfp-interface-autodetection.patch @@ -0,0 +1,106 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Baruch Siach +Date: Wed, 3 Oct 2018 19:04:49 +0300 +Subject: net: phy: phylink: fix SFP interface autodetection + +From: Baruch Siach + +[ Upstream commit 7e4183752735deb7543e179a44f4f4b44917cd6f ] + +When connecting SFP PHY to phylink use the detected interface. +Otherwise, the link fails to come up when the configured 'phy-mode' +differs from the SFP detected mode. + +Move most of phylink_connect_phy() into __phylink_connect_phy(), and +leave phylink_connect_phy() as a wrapper. phylink_sfp_connect_phy() can +now pass the SFP detected PHY interface to __phylink_connect_phy(). + +This fixes 1GB SFP module link up on eth3 of the Macchiatobin board that +is configured in the DT to "2500base-x" phy-mode. + +Fixes: 9525ae83959b6 ("phylink: add phylink infrastructure") +Suggested-by: Russell King +Signed-off-by: Baruch Siach +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phylink.c | 48 ++++++++++++++++++++++++++-------------------- + 1 file changed, 28 insertions(+), 20 deletions(-) + +--- a/drivers/net/phy/phylink.c ++++ b/drivers/net/phy/phylink.c +@@ -717,6 +717,30 @@ static int phylink_bringup_phy(struct ph + return 0; + } + ++static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy, ++ phy_interface_t interface) ++{ ++ int ret; ++ ++ if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED || ++ (pl->link_an_mode == MLO_AN_INBAND && ++ phy_interface_mode_is_8023z(interface)))) ++ return -EINVAL; ++ ++ if (pl->phydev) ++ return -EBUSY; ++ ++ ret = phy_attach_direct(pl->netdev, phy, 0, interface); ++ if (ret) ++ return ret; ++ ++ ret = phylink_bringup_phy(pl, phy); ++ if (ret) ++ phy_detach(phy); ++ ++ return ret; ++} ++ + /** + * phylink_connect_phy() - connect a PHY to the phylink instance + * @pl: a pointer to a &struct phylink returned from phylink_create() +@@ -734,31 +758,13 @@ static int phylink_bringup_phy(struct ph + */ + int phylink_connect_phy(struct phylink *pl, struct phy_device *phy) + { +- int ret; +- +- if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED || +- (pl->link_an_mode == MLO_AN_INBAND && +- phy_interface_mode_is_8023z(pl->link_interface)))) +- return -EINVAL; +- +- if (pl->phydev) +- return -EBUSY; +- + /* Use PHY device/driver interface */ + if (pl->link_interface == PHY_INTERFACE_MODE_NA) { + pl->link_interface = phy->interface; + pl->link_config.interface = pl->link_interface; + } + +- ret = phy_attach_direct(pl->netdev, phy, 0, pl->link_interface); +- if (ret) +- return ret; +- +- ret = phylink_bringup_phy(pl, phy); +- if (ret) +- phy_detach(phy); +- +- return ret; ++ return __phylink_connect_phy(pl, phy, pl->link_interface); + } + EXPORT_SYMBOL_GPL(phylink_connect_phy); + +@@ -1672,7 +1678,9 @@ static void phylink_sfp_link_up(void *up + + static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy) + { +- return phylink_connect_phy(upstream, phy); ++ struct phylink *pl = upstream; ++ ++ return __phylink_connect_phy(upstream, phy, pl->link_config.interface); + } + + static void phylink_sfp_disconnect_phy(void *upstream) diff --git a/queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-receive-path.patch b/queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-receive-path.patch new file mode 100644 index 00000000000..a847a6a05a2 --- /dev/null +++ b/queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-receive-path.patch @@ -0,0 +1,49 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Subash Abhinov Kasiviswanathan +Date: Tue, 2 Oct 2018 18:52:03 -0600 +Subject: net: qualcomm: rmnet: Fix incorrect allocation flag in receive path + +From: Subash Abhinov Kasiviswanathan + +[ Upstream commit ec405641e2b73160e26ef17580d0cf28565d146c ] + +The incoming skb needs to be reallocated in case the headroom +is not sufficient to adjust the ethernet header. This allocation +needs to be atomic otherwise it results in this splat + + [<600601bb>] ___might_sleep+0x185/0x1a3 + [<603f6314>] ? _raw_spin_unlock_irqrestore+0x0/0x27 + [<60069bb0>] ? __wake_up_common_lock+0x95/0xd1 + [<600602b0>] __might_sleep+0xd7/0xe2 + [<60065598>] ? enqueue_task_fair+0x112/0x209 + [<600eea13>] __kmalloc_track_caller+0x5d/0x124 + [<600ee9b6>] ? __kmalloc_track_caller+0x0/0x124 + [<602696d5>] __kmalloc_reserve.isra.34+0x30/0x7e + [<603f629b>] ? _raw_spin_lock_irqsave+0x0/0x3d + [<6026b744>] pskb_expand_head+0xbf/0x310 + [<6025ca6a>] rmnet_rx_handler+0x7e/0x16b + [<6025c9ec>] ? rmnet_rx_handler+0x0/0x16b + [<6027ad0c>] __netif_receive_skb_core+0x301/0x96f + [<60033c17>] ? set_signals+0x0/0x40 + [<6027bbcb>] __netif_receive_skb+0x24/0x8e + +Fixes: 74692caf1b0b ("net: qualcomm: rmnet: Process packets over ethernet") +Signed-off-by: Sean Tranchetti +Signed-off-by: Subash Abhinov Kasiviswanathan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c ++++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +@@ -113,7 +113,7 @@ rmnet_map_ingress_handler(struct sk_buff + struct sk_buff *skbn; + + if (skb->dev->type == ARPHRD_ETHER) { +- if (pskb_expand_head(skb, ETH_HLEN, 0, GFP_KERNEL)) { ++ if (pskb_expand_head(skb, ETH_HLEN, 0, GFP_ATOMIC)) { + kfree_skb(skb); + return; + } diff --git a/queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-transmit.patch b/queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-transmit.patch new file mode 100644 index 00000000000..b1d8a9f492d --- /dev/null +++ b/queue-4.18/net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-transmit.patch @@ -0,0 +1,52 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Subash Abhinov Kasiviswanathan +Date: Tue, 2 Oct 2018 18:52:02 -0600 +Subject: net: qualcomm: rmnet: Fix incorrect allocation flag in transmit + +From: Subash Abhinov Kasiviswanathan + +[ Upstream commit 6392ff3c8e4c23d0a09b0ae9f94feb3effed490b ] + +The incoming skb needs to be reallocated in case the headroom +is not sufficient to add the MAP header. This allocation needs to +be atomic otherwise it results in the following splat + +[32805.801456] BUG: sleeping function called from invalid context +[32805.841141] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP +[32805.904773] task: ffffffd7c5f62280 task.stack: ffffff80464a8000 +[32805.910851] pc : ___might_sleep+0x180/0x188 +[32805.915143] lr : ___might_sleep+0x180/0x188 +[32806.131520] Call trace: +[32806.134041] ___might_sleep+0x180/0x188 +[32806.137980] __might_sleep+0x50/0x84 +[32806.141653] __kmalloc_track_caller+0x80/0x3bc +[32806.146215] __kmalloc_reserve+0x3c/0x88 +[32806.150241] pskb_expand_head+0x74/0x288 +[32806.154269] rmnet_egress_handler+0xb0/0x1d8 +[32806.162239] rmnet_vnd_start_xmit+0xc8/0x13c +[32806.166627] dev_hard_start_xmit+0x148/0x280 +[32806.181181] sch_direct_xmit+0xa4/0x198 +[32806.185125] __qdisc_run+0x1f8/0x310 +[32806.188803] net_tx_action+0x23c/0x26c +[32806.192655] __do_softirq+0x220/0x408 +[32806.196420] do_softirq+0x4c/0x70 + +Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") +Signed-off-by: Subash Abhinov Kasiviswanathan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c ++++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +@@ -147,7 +147,7 @@ static int rmnet_map_egress_handler(stru + } + + if (skb_headroom(skb) < required_headroom) { +- if (pskb_expand_head(skb, required_headroom, 0, GFP_KERNEL)) ++ if (pskb_expand_head(skb, required_headroom, 0, GFP_ATOMIC)) + return -ENOMEM; + } + diff --git a/queue-4.18/net-qualcomm-rmnet-skip-processing-loopback-packets.patch b/queue-4.18/net-qualcomm-rmnet-skip-processing-loopback-packets.patch new file mode 100644 index 00000000000..0b034f11756 --- /dev/null +++ b/queue-4.18/net-qualcomm-rmnet-skip-processing-loopback-packets.patch @@ -0,0 +1,34 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Sean Tranchetti +Date: Tue, 2 Oct 2018 18:52:01 -0600 +Subject: net: qualcomm: rmnet: Skip processing loopback packets + +From: Sean Tranchetti + +[ Upstream commit a07f388e2cde2be74b263f85df6f672fea0305a1 ] + +RMNET RX handler was processing invalid packets that were +originally sent on the real device and were looped back via +dev_loopback_xmit(). This was detected using syzkaller. + +Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") +Signed-off-by: Sean Tranchetti +Signed-off-by: Subash Abhinov Kasiviswanathan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c ++++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +@@ -189,6 +189,9 @@ rx_handler_result_t rmnet_rx_handler(str + if (!skb) + goto done; + ++ if (skb->pkt_type == PACKET_LOOPBACK) ++ return RX_HANDLER_PASS; ++ + dev = skb->dev; + port = rmnet_get_port(dev); + diff --git a/queue-4.18/net-sched-add-policy-validation-for-tc-attributes.patch b/queue-4.18/net-sched-add-policy-validation-for-tc-attributes.patch new file mode 100644 index 00000000000..5ae826950e9 --- /dev/null +++ b/queue-4.18/net-sched-add-policy-validation-for-tc-attributes.patch @@ -0,0 +1,87 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: David Ahern +Date: Wed, 3 Oct 2018 15:05:36 -0700 +Subject: net: sched: Add policy validation for tc attributes + +From: David Ahern + +[ Upstream commit 8b4c3cdd9dd8290343ce959a132d3b334062c5b9 ] + +A number of TC attributes are processed without proper validation +(e.g., length checks). Add a tca policy for all input attributes and use +when invoking nlmsg_parse. + +The 2 Fixes tags below cover the latest additions. The other attributes +are a string (KIND), nested attribute (OPTIONS which does seem to have +validation in most cases), for dumps only or a flag. + +Fixes: 5bc1701881e39 ("net: sched: introduce multichain support for filters") +Fixes: d47a6b0e7c492 ("net: sched: introduce ingress/egress block index attributes for qdisc") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_api.c | 24 ++++++++++++++++++++---- + 1 file changed, 20 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -1304,6 +1304,18 @@ check_loop_fn(struct Qdisc *q, unsigned + * Delete/get qdisc. + */ + ++const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { ++ [TCA_KIND] = { .type = NLA_STRING }, ++ [TCA_OPTIONS] = { .type = NLA_NESTED }, ++ [TCA_RATE] = { .type = NLA_BINARY, ++ .len = sizeof(struct tc_estimator) }, ++ [TCA_STAB] = { .type = NLA_NESTED }, ++ [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG }, ++ [TCA_CHAIN] = { .type = NLA_U32 }, ++ [TCA_INGRESS_BLOCK] = { .type = NLA_U32 }, ++ [TCA_EGRESS_BLOCK] = { .type = NLA_U32 }, ++}; ++ + static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) + { +@@ -1320,7 +1332,8 @@ static int tc_get_qdisc(struct sk_buff * + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + +- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); ++ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, ++ extack); + if (err < 0) + return err; + +@@ -1404,7 +1417,8 @@ static int tc_modify_qdisc(struct sk_buf + + replay: + /* Reinit, just in case something touches this. */ +- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); ++ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, ++ extack); + if (err < 0) + return err; + +@@ -1638,7 +1652,8 @@ static int tc_dump_qdisc(struct sk_buff + idx = 0; + ASSERT_RTNL(); + +- err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL); ++ err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, ++ rtm_tca_policy, NULL); + if (err < 0) + return err; + +@@ -1857,7 +1872,8 @@ static int tc_ctl_tclass(struct sk_buff + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + +- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); ++ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, ++ extack); + if (err < 0) + return err; + diff --git a/queue-4.18/net-sched-cls_u32-fix-hnode-refcounting.patch b/queue-4.18/net-sched-cls_u32-fix-hnode-refcounting.patch new file mode 100644 index 00000000000..fd249730a24 --- /dev/null +++ b/queue-4.18/net-sched-cls_u32-fix-hnode-refcounting.patch @@ -0,0 +1,107 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Al Viro +Date: Sun, 7 Oct 2018 07:40:17 -0400 +Subject: net: sched: cls_u32: fix hnode refcounting + +From: Al Viro + +[ Upstream commit 6d4c407744dd0338da5d5d76f40dce5adabfb30a ] + +cls_u32.c misuses refcounts for struct tc_u_hnode - it counts references +via ->hlist and via ->tp_root together. u32_destroy() drops the former +and, in case when there had been links, leaves the sucker on the list. +As the result, there's nothing to protect it from getting freed once links +are dropped. +That also makes the "is it busy" check incapable of catching the root +hnode - it *is* busy (there's a reference from tp), but we don't see it as +something separate. "Is it our root?" check partially covers that, but +the problem exists for others' roots as well. + +AFAICS, the minimal fix preserving the existing behaviour (where it doesn't +include oopsen, that is) would be this: + * count tp->root and tp_c->hlist as separate references. I.e. +have u32_init() set refcount to 2, not 1. + * in u32_destroy() we always drop the former; +in u32_destroy_hnode() - the latter. + + That way we have *all* references contributing to refcount. List +removal happens in u32_destroy_hnode() (called only when ->refcnt is 1) +an in u32_destroy() in case of tc_u_common going away, along with +everything reachable from it. IOW, that way we know that +u32_destroy_key() won't free something still on the list (or pointed to by +someone's ->root). + +Reproducer: + +tc qdisc add dev eth0 ingress +tc filter add dev eth0 parent ffff: protocol ip prio 100 handle 1: \ +u32 divisor 1 +tc filter add dev eth0 parent ffff: protocol ip prio 200 handle 2: \ +u32 divisor 1 +tc filter add dev eth0 parent ffff: protocol ip prio 100 \ +handle 1:0:11 u32 ht 1: link 801: offset at 0 mask 0f00 shift 6 \ +plus 0 eat match ip protocol 6 ff +tc filter delete dev eth0 parent ffff: protocol ip prio 200 +tc filter change dev eth0 parent ffff: protocol ip prio 100 \ +handle 1:0:11 u32 ht 1: link 0: offset at 0 mask 0f00 shift 6 plus 0 \ +eat match ip protocol 6 ff +tc filter delete dev eth0 parent ffff: protocol ip prio 100 + +Signed-off-by: Al Viro +Signed-off-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_u32.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/net/sched/cls_u32.c ++++ b/net/sched/cls_u32.c +@@ -397,6 +397,7 @@ static int u32_init(struct tcf_proto *tp + rcu_assign_pointer(tp_c->hlist, root_ht); + root_ht->tp_c = tp_c; + ++ root_ht->refcnt++; + rcu_assign_pointer(tp->root, root_ht); + tp->data = tp_c; + return 0; +@@ -608,7 +609,7 @@ static int u32_destroy_hnode(struct tcf_ + struct tc_u_hnode __rcu **hn; + struct tc_u_hnode *phn; + +- WARN_ON(ht->refcnt); ++ WARN_ON(--ht->refcnt); + + u32_clear_hnode(tp, ht, extack); + +@@ -647,7 +648,7 @@ static void u32_destroy(struct tcf_proto + + WARN_ON(root_ht == NULL); + +- if (root_ht && --root_ht->refcnt == 0) ++ if (root_ht && --root_ht->refcnt == 1) + u32_destroy_hnode(tp, root_ht, extack); + + if (--tp_c->refcnt == 0) { +@@ -696,7 +697,6 @@ static int u32_delete(struct tcf_proto * + } + + if (ht->refcnt == 1) { +- ht->refcnt--; + u32_destroy_hnode(tp, ht, extack); + } else { + NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); +@@ -706,11 +706,11 @@ static int u32_delete(struct tcf_proto * + out: + *last = true; + if (root_ht) { +- if (root_ht->refcnt > 1) { ++ if (root_ht->refcnt > 2) { + *last = false; + goto ret; + } +- if (root_ht->refcnt == 1) { ++ if (root_ht->refcnt == 2) { + if (!ht_empty(root_ht)) { + *last = false; + goto ret; diff --git a/queue-4.18/net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch b/queue-4.18/net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch new file mode 100644 index 00000000000..c2772297786 --- /dev/null +++ b/queue-4.18/net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch @@ -0,0 +1,62 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Jose Abreu +Date: Mon, 17 Sep 2018 09:22:57 +0100 +Subject: net: stmmac: Fixup the tail addr setting in xmit path + +From: Jose Abreu + +[ Upstream commit 0431100b3d82c509729ece1ab22ada2484e209c1 ] + +Currently we are always setting the tail address of descriptor list to +the end of the pre-allocated list. + +According to databook this is not correct. Tail address should point to +the last available descriptor + 1, which means we have to update the +tail address everytime we call the xmit function. + +This should make no impact in older versions of MAC but in newer +versions there are some DMA features which allows the IP to fetch +descriptors in advance and in a non sequential order so its critical +that we set the tail address correctly. + +Signed-off-by: Jose Abreu +Fixes: f748be531d70 ("stmmac: support new GMAC4") +Cc: David S. Miller +Cc: Joao Pinto +Cc: Giuseppe Cavallaro +Cc: Alexandre Torgue +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -2193,8 +2193,7 @@ static int stmmac_init_dma_engine(struct + stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, + tx_q->dma_tx_phy, chan); + +- tx_q->tx_tail_addr = tx_q->dma_tx_phy + +- (DMA_TX_SIZE * sizeof(struct dma_desc)); ++ tx_q->tx_tail_addr = tx_q->dma_tx_phy; + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, + tx_q->tx_tail_addr, chan); + } +@@ -2971,6 +2970,7 @@ static netdev_tx_t stmmac_tso_xmit(struc + + netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); + ++ tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc)); + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); + + return NETDEV_TX_OK; +@@ -3178,6 +3178,8 @@ static netdev_tx_t stmmac_xmit(struct sk + netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); + + stmmac_enable_dma_transmission(priv, priv->ioaddr); ++ ++ tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc)); + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); + + return NETDEV_TX_OK; diff --git a/queue-4.18/net-stmmac-rework-coalesce-timer-and-fix-multi-queue-races.patch b/queue-4.18/net-stmmac-rework-coalesce-timer-and-fix-multi-queue-races.patch new file mode 100644 index 00000000000..c3b7b1fd5df --- /dev/null +++ b/queue-4.18/net-stmmac-rework-coalesce-timer-and-fix-multi-queue-races.patch @@ -0,0 +1,540 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Jose Abreu +Date: Mon, 17 Sep 2018 09:22:56 +0100 +Subject: net: stmmac: Rework coalesce timer and fix multi-queue races + +From: Jose Abreu + +[ Upstream commit 8fce3331702316d4bcfeb0771c09ac75d2192bbc ] + +This follows David Miller advice and tries to fix coalesce timer in +multi-queue scenarios. + +We are now using per-queue coalesce values and per-queue TX timer. + +Coalesce timer default values was changed to 1ms and the coalesce frames +to 25. + +Tested in B2B setup between XGMAC2 and GMAC5. + +Signed-off-by: Jose Abreu +Fixes: ce736788e8a ("net: stmmac: adding multiple buffers for TX") +Cc: Florian Fainelli +Cc: Neil Armstrong +Cc: Jerome Brunet +Cc: Martin Blumenstingl +Cc: David S. Miller +Cc: Joao Pinto +Cc: Giuseppe Cavallaro +Cc: Alexandre Torgue +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/common.h | 4 + drivers/net/ethernet/stmicro/stmmac/stmmac.h | 14 + + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 234 ++++++++++++---------- + include/linux/stmmac.h | 1 + 4 files changed, 146 insertions(+), 107 deletions(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/common.h ++++ b/drivers/net/ethernet/stmicro/stmmac/common.h +@@ -256,10 +256,10 @@ struct stmmac_safety_stats { + #define MAX_DMA_RIWT 0xff + #define MIN_DMA_RIWT 0x20 + /* Tx coalesce parameters */ +-#define STMMAC_COAL_TX_TIMER 40000 ++#define STMMAC_COAL_TX_TIMER 1000 + #define STMMAC_MAX_COAL_TX_TICK 100000 + #define STMMAC_TX_MAX_FRAMES 256 +-#define STMMAC_TX_FRAMES 64 ++#define STMMAC_TX_FRAMES 25 + + /* Packets types */ + enum packets_types { +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h +@@ -48,6 +48,8 @@ struct stmmac_tx_info { + + /* Frequently used values are kept adjacent for cache effect */ + struct stmmac_tx_queue { ++ u32 tx_count_frames; ++ struct timer_list txtimer; + u32 queue_index; + struct stmmac_priv *priv_data; + struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp; +@@ -73,7 +75,14 @@ struct stmmac_rx_queue { + u32 rx_zeroc_thresh; + dma_addr_t dma_rx_phy; + u32 rx_tail_addr; ++}; ++ ++struct stmmac_channel { + struct napi_struct napi ____cacheline_aligned_in_smp; ++ struct stmmac_priv *priv_data; ++ u32 index; ++ int has_rx; ++ int has_tx; + }; + + struct stmmac_tc_entry { +@@ -109,14 +118,12 @@ struct stmmac_pps_cfg { + + struct stmmac_priv { + /* Frequently used values are kept adjacent for cache effect */ +- u32 tx_count_frames; + u32 tx_coal_frames; + u32 tx_coal_timer; + + int tx_coalesce; + int hwts_tx_en; + bool tx_path_in_lpi_mode; +- struct timer_list txtimer; + bool tso; + + unsigned int dma_buf_sz; +@@ -137,6 +144,9 @@ struct stmmac_priv { + /* TX Queue */ + struct stmmac_tx_queue tx_queue[MTL_MAX_TX_QUEUES]; + ++ /* Generic channel for NAPI */ ++ struct stmmac_channel channel[STMMAC_CH_MAX]; ++ + bool oldlink; + int speed; + int oldduplex; +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -147,12 +147,14 @@ static void stmmac_verify_args(void) + static void stmmac_disable_all_queues(struct stmmac_priv *priv) + { + u32 rx_queues_cnt = priv->plat->rx_queues_to_use; ++ u32 tx_queues_cnt = priv->plat->tx_queues_to_use; ++ u32 maxq = max(rx_queues_cnt, tx_queues_cnt); + u32 queue; + +- for (queue = 0; queue < rx_queues_cnt; queue++) { +- struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; ++ for (queue = 0; queue < maxq; queue++) { ++ struct stmmac_channel *ch = &priv->channel[queue]; + +- napi_disable(&rx_q->napi); ++ napi_disable(&ch->napi); + } + } + +@@ -163,12 +165,14 @@ static void stmmac_disable_all_queues(st + static void stmmac_enable_all_queues(struct stmmac_priv *priv) + { + u32 rx_queues_cnt = priv->plat->rx_queues_to_use; ++ u32 tx_queues_cnt = priv->plat->tx_queues_to_use; ++ u32 maxq = max(rx_queues_cnt, tx_queues_cnt); + u32 queue; + +- for (queue = 0; queue < rx_queues_cnt; queue++) { +- struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; ++ for (queue = 0; queue < maxq; queue++) { ++ struct stmmac_channel *ch = &priv->channel[queue]; + +- napi_enable(&rx_q->napi); ++ napi_enable(&ch->napi); + } + } + +@@ -1822,18 +1826,18 @@ static void stmmac_dma_operation_mode(st + * @queue: TX queue index + * Description: it reclaims the transmit resources after transmission completes. + */ +-static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue) ++static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) + { + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + unsigned int bytes_compl = 0, pkts_compl = 0; +- unsigned int entry; ++ unsigned int entry, count = 0; + +- netif_tx_lock(priv->dev); ++ __netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue)); + + priv->xstats.tx_clean++; + + entry = tx_q->dirty_tx; +- while (entry != tx_q->cur_tx) { ++ while ((entry != tx_q->cur_tx) && (count < budget)) { + struct sk_buff *skb = tx_q->tx_skbuff[entry]; + struct dma_desc *p; + int status; +@@ -1849,6 +1853,8 @@ static void stmmac_tx_clean(struct stmma + if (unlikely(status & tx_dma_own)) + break; + ++ count++; ++ + /* Make sure descriptor fields are read after reading + * the own bit. + */ +@@ -1916,7 +1922,10 @@ static void stmmac_tx_clean(struct stmma + stmmac_enable_eee_mode(priv); + mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer)); + } +- netif_tx_unlock(priv->dev); ++ ++ __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue)); ++ ++ return count; + } + + /** +@@ -1999,6 +2008,33 @@ static bool stmmac_safety_feat_interrupt + return false; + } + ++static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) ++{ ++ int status = stmmac_dma_interrupt_status(priv, priv->ioaddr, ++ &priv->xstats, chan); ++ struct stmmac_channel *ch = &priv->channel[chan]; ++ bool needs_work = false; ++ ++ if ((status & handle_rx) && ch->has_rx) { ++ needs_work = true; ++ } else { ++ status &= ~handle_rx; ++ } ++ ++ if ((status & handle_tx) && ch->has_tx) { ++ needs_work = true; ++ } else { ++ status &= ~handle_tx; ++ } ++ ++ if (needs_work && napi_schedule_prep(&ch->napi)) { ++ stmmac_disable_dma_irq(priv, priv->ioaddr, chan); ++ __napi_schedule(&ch->napi); ++ } ++ ++ return status; ++} ++ + /** + * stmmac_dma_interrupt - DMA ISR + * @priv: driver private structure +@@ -2013,57 +2049,14 @@ static void stmmac_dma_interrupt(struct + u32 channels_to_check = tx_channel_count > rx_channel_count ? + tx_channel_count : rx_channel_count; + u32 chan; +- bool poll_scheduled = false; + int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)]; + + /* Make sure we never check beyond our status buffer. */ + if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status))) + channels_to_check = ARRAY_SIZE(status); + +- /* Each DMA channel can be used for rx and tx simultaneously, yet +- * napi_struct is embedded in struct stmmac_rx_queue rather than in a +- * stmmac_channel struct. +- * Because of this, stmmac_poll currently checks (and possibly wakes) +- * all tx queues rather than just a single tx queue. +- */ + for (chan = 0; chan < channels_to_check; chan++) +- status[chan] = stmmac_dma_interrupt_status(priv, priv->ioaddr, +- &priv->xstats, chan); +- +- for (chan = 0; chan < rx_channel_count; chan++) { +- if (likely(status[chan] & handle_rx)) { +- struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan]; +- +- if (likely(napi_schedule_prep(&rx_q->napi))) { +- stmmac_disable_dma_irq(priv, priv->ioaddr, chan); +- __napi_schedule(&rx_q->napi); +- poll_scheduled = true; +- } +- } +- } +- +- /* If we scheduled poll, we already know that tx queues will be checked. +- * If we didn't schedule poll, see if any DMA channel (used by tx) has a +- * completed transmission, if so, call stmmac_poll (once). +- */ +- if (!poll_scheduled) { +- for (chan = 0; chan < tx_channel_count; chan++) { +- if (status[chan] & handle_tx) { +- /* It doesn't matter what rx queue we choose +- * here. We use 0 since it always exists. +- */ +- struct stmmac_rx_queue *rx_q = +- &priv->rx_queue[0]; +- +- if (likely(napi_schedule_prep(&rx_q->napi))) { +- stmmac_disable_dma_irq(priv, +- priv->ioaddr, chan); +- __napi_schedule(&rx_q->napi); +- } +- break; +- } +- } +- } ++ status[chan] = stmmac_napi_check(priv, chan); + + for (chan = 0; chan < tx_channel_count; chan++) { + if (unlikely(status[chan] & tx_hard_error_bump_tc)) { +@@ -2211,6 +2204,13 @@ static int stmmac_init_dma_engine(struct + return ret; + } + ++static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue) ++{ ++ struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; ++ ++ mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer)); ++} ++ + /** + * stmmac_tx_timer - mitigation sw timer for tx. + * @data: data pointer +@@ -2219,13 +2219,14 @@ static int stmmac_init_dma_engine(struct + */ + static void stmmac_tx_timer(struct timer_list *t) + { +- struct stmmac_priv *priv = from_timer(priv, t, txtimer); +- u32 tx_queues_count = priv->plat->tx_queues_to_use; +- u32 queue; ++ struct stmmac_tx_queue *tx_q = from_timer(tx_q, t, txtimer); ++ struct stmmac_priv *priv = tx_q->priv_data; ++ struct stmmac_channel *ch; + +- /* let's scan all the tx queues */ +- for (queue = 0; queue < tx_queues_count; queue++) +- stmmac_tx_clean(priv, queue); ++ ch = &priv->channel[tx_q->queue_index]; ++ ++ if (likely(napi_schedule_prep(&ch->napi))) ++ __napi_schedule(&ch->napi); + } + + /** +@@ -2238,11 +2239,17 @@ static void stmmac_tx_timer(struct timer + */ + static void stmmac_init_tx_coalesce(struct stmmac_priv *priv) + { ++ u32 tx_channel_count = priv->plat->tx_queues_to_use; ++ u32 chan; ++ + priv->tx_coal_frames = STMMAC_TX_FRAMES; + priv->tx_coal_timer = STMMAC_COAL_TX_TIMER; +- timer_setup(&priv->txtimer, stmmac_tx_timer, 0); +- priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer); +- add_timer(&priv->txtimer); ++ ++ for (chan = 0; chan < tx_channel_count; chan++) { ++ struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan]; ++ ++ timer_setup(&tx_q->txtimer, stmmac_tx_timer, 0); ++ } + } + + static void stmmac_set_rings_length(struct stmmac_priv *priv) +@@ -2570,6 +2577,7 @@ static void stmmac_hw_teardown(struct ne + static int stmmac_open(struct net_device *dev) + { + struct stmmac_priv *priv = netdev_priv(dev); ++ u32 chan; + int ret; + + stmmac_check_ether_addr(priv); +@@ -2666,7 +2674,9 @@ irq_error: + if (dev->phydev) + phy_stop(dev->phydev); + +- del_timer_sync(&priv->txtimer); ++ for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) ++ del_timer_sync(&priv->tx_queue[chan].txtimer); ++ + stmmac_hw_teardown(dev); + init_error: + free_dma_desc_resources(priv); +@@ -2686,6 +2696,7 @@ dma_desc_error: + static int stmmac_release(struct net_device *dev) + { + struct stmmac_priv *priv = netdev_priv(dev); ++ u32 chan; + + if (priv->eee_enabled) + del_timer_sync(&priv->eee_ctrl_timer); +@@ -2700,7 +2711,8 @@ static int stmmac_release(struct net_dev + + stmmac_disable_all_queues(priv); + +- del_timer_sync(&priv->txtimer); ++ for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) ++ del_timer_sync(&priv->tx_queue[chan].txtimer); + + /* Free the IRQ lines */ + free_irq(dev->irq, dev); +@@ -2914,14 +2926,13 @@ static netdev_tx_t stmmac_tso_xmit(struc + priv->xstats.tx_tso_nfrags += nfrags; + + /* Manage tx mitigation */ +- priv->tx_count_frames += nfrags + 1; +- if (likely(priv->tx_coal_frames > priv->tx_count_frames)) { +- mod_timer(&priv->txtimer, +- STMMAC_COAL_TIMER(priv->tx_coal_timer)); +- } else { +- priv->tx_count_frames = 0; ++ tx_q->tx_count_frames += nfrags + 1; ++ if (priv->tx_coal_frames <= tx_q->tx_count_frames) { + stmmac_set_tx_ic(priv, desc); + priv->xstats.tx_set_ic_bit++; ++ tx_q->tx_count_frames = 0; ++ } else { ++ stmmac_tx_timer_arm(priv, queue); + } + + skb_tx_timestamp(skb); +@@ -3125,14 +3136,13 @@ static netdev_tx_t stmmac_xmit(struct sk + * This approach takes care about the fragments: desc is the first + * element in case of no SG. + */ +- priv->tx_count_frames += nfrags + 1; +- if (likely(priv->tx_coal_frames > priv->tx_count_frames)) { +- mod_timer(&priv->txtimer, +- STMMAC_COAL_TIMER(priv->tx_coal_timer)); +- } else { +- priv->tx_count_frames = 0; ++ tx_q->tx_count_frames += nfrags + 1; ++ if (priv->tx_coal_frames <= tx_q->tx_count_frames) { + stmmac_set_tx_ic(priv, desc); + priv->xstats.tx_set_ic_bit++; ++ tx_q->tx_count_frames = 0; ++ } else { ++ stmmac_tx_timer_arm(priv, queue); + } + + skb_tx_timestamp(skb); +@@ -3300,6 +3310,7 @@ static inline void stmmac_rx_refill(stru + static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) + { + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; ++ struct stmmac_channel *ch = &priv->channel[queue]; + unsigned int entry = rx_q->cur_rx; + int coe = priv->hw->rx_csum; + unsigned int next_entry; +@@ -3469,7 +3480,7 @@ static int stmmac_rx(struct stmmac_priv + else + skb->ip_summed = CHECKSUM_UNNECESSARY; + +- napi_gro_receive(&rx_q->napi, skb); ++ napi_gro_receive(&ch->napi, skb); + + priv->dev->stats.rx_packets++; + priv->dev->stats.rx_bytes += frame_len; +@@ -3492,27 +3503,33 @@ static int stmmac_rx(struct stmmac_priv + * Description : + * To look at the incoming frames and clear the tx resources. + */ +-static int stmmac_poll(struct napi_struct *napi, int budget) ++static int stmmac_napi_poll(struct napi_struct *napi, int budget) + { +- struct stmmac_rx_queue *rx_q = +- container_of(napi, struct stmmac_rx_queue, napi); +- struct stmmac_priv *priv = rx_q->priv_data; +- u32 tx_count = priv->plat->tx_queues_to_use; +- u32 chan = rx_q->queue_index; +- int work_done = 0; +- u32 queue; ++ struct stmmac_channel *ch = ++ container_of(napi, struct stmmac_channel, napi); ++ struct stmmac_priv *priv = ch->priv_data; ++ int work_done = 0, work_rem = budget; ++ u32 chan = ch->index; + + priv->xstats.napi_poll++; + +- /* check all the queues */ +- for (queue = 0; queue < tx_count; queue++) +- stmmac_tx_clean(priv, queue); +- +- work_done = stmmac_rx(priv, budget, rx_q->queue_index); +- if (work_done < budget) { +- napi_complete_done(napi, work_done); +- stmmac_enable_dma_irq(priv, priv->ioaddr, chan); ++ if (ch->has_tx) { ++ int done = stmmac_tx_clean(priv, work_rem, chan); ++ ++ work_done += done; ++ work_rem -= done; ++ } ++ ++ if (ch->has_rx) { ++ int done = stmmac_rx(priv, work_rem, chan); ++ ++ work_done += done; ++ work_rem -= done; + } ++ ++ if (work_done < budget && napi_complete_done(napi, work_done)) ++ stmmac_enable_dma_irq(priv, priv->ioaddr, chan); ++ + return work_done; + } + +@@ -4172,8 +4189,8 @@ int stmmac_dvr_probe(struct device *devi + { + struct net_device *ndev = NULL; + struct stmmac_priv *priv; ++ u32 queue, maxq; + int ret = 0; +- u32 queue; + + ndev = alloc_etherdev_mqs(sizeof(struct stmmac_priv), + MTL_MAX_TX_QUEUES, +@@ -4293,11 +4310,22 @@ int stmmac_dvr_probe(struct device *devi + "Enable RX Mitigation via HW Watchdog Timer\n"); + } + +- for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) { +- struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; ++ /* Setup channels NAPI */ ++ maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use); + +- netif_napi_add(ndev, &rx_q->napi, stmmac_poll, +- (8 * priv->plat->rx_queues_to_use)); ++ for (queue = 0; queue < maxq; queue++) { ++ struct stmmac_channel *ch = &priv->channel[queue]; ++ ++ ch->priv_data = priv; ++ ch->index = queue; ++ ++ if (queue < priv->plat->rx_queues_to_use) ++ ch->has_rx = true; ++ if (queue < priv->plat->tx_queues_to_use) ++ ch->has_tx = true; ++ ++ netif_napi_add(ndev, &ch->napi, stmmac_napi_poll, ++ NAPI_POLL_WEIGHT); + } + + mutex_init(&priv->lock); +@@ -4343,10 +4371,10 @@ error_netdev_register: + priv->hw->pcs != STMMAC_PCS_RTBI) + stmmac_mdio_unregister(ndev); + error_mdio_register: +- for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) { +- struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; ++ for (queue = 0; queue < maxq; queue++) { ++ struct stmmac_channel *ch = &priv->channel[queue]; + +- netif_napi_del(&rx_q->napi); ++ netif_napi_del(&ch->napi); + } + error_hw_init: + destroy_workqueue(priv->wq); +--- a/include/linux/stmmac.h ++++ b/include/linux/stmmac.h +@@ -30,6 +30,7 @@ + + #define MTL_MAX_RX_QUEUES 8 + #define MTL_MAX_TX_QUEUES 8 ++#define STMMAC_CH_MAX 8 + + #define STMMAC_RX_COE_NONE 0 + #define STMMAC_RX_COE_TYPE1 1 diff --git a/queue-4.18/net-systemport-fix-wake-up-interrupt-race-during-resume.patch b/queue-4.18/net-systemport-fix-wake-up-interrupt-race-during-resume.patch new file mode 100644 index 00000000000..c7cfcb22e2a --- /dev/null +++ b/queue-4.18/net-systemport-fix-wake-up-interrupt-race-during-resume.patch @@ -0,0 +1,90 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Florian Fainelli +Date: Tue, 2 Oct 2018 16:52:03 -0700 +Subject: net: systemport: Fix wake-up interrupt race during resume + +From: Florian Fainelli + +[ Upstream commit 45ec318578c0c22a11f5b9927d064418e1ab1905 ] + +The AON_PM_L2 is normally used to trigger and identify the source of a +wake-up event. Since the RX_SYS clock is no longer turned off, we also +have an interrupt being sent to the SYSTEMPORT INTRL_2_0 controller, and +that interrupt remains active up until the magic packet detector is +disabled which happens much later during the driver resumption. + +The race happens if we have a CPU that is entering the SYSTEMPORT +INTRL2_0 handler during resume, and another CPU has managed to clear the +wake-up interrupt during bcm_sysport_resume_from_wol(). In that case, we +have the first CPU stuck in the interrupt handler with an interrupt +cause that has been cleared under its feet, and so we keep returning +IRQ_NONE and we never make any progress. + +This was not a problem before because we would always turn off the +RX_SYS clock during WoL, so the SYSTEMPORT INTRL2_0 would also be turned +off as well, thus not latching the interrupt. + +The fix is to make sure we do not enable either the MPD or +BRCM_TAG_MATCH interrupts since those are redundant with what the +AON_PM_L2 interrupt controller already processes and they would cause +such a race to occur. + +Fixes: bb9051a2b230 ("net: systemport: Add support for WAKE_FILTER") +Fixes: 83e82f4c706b ("net: systemport: add Wake-on-LAN support") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bcmsysport.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bcmsysport.c ++++ b/drivers/net/ethernet/broadcom/bcmsysport.c +@@ -1045,14 +1045,22 @@ static void bcm_sysport_resume_from_wol( + { + u32 reg; + +- /* Stop monitoring MPD interrupt */ +- intrl2_0_mask_set(priv, INTRL2_0_MPD); +- + /* Clear the MagicPacket detection logic */ + reg = umac_readl(priv, UMAC_MPD_CTRL); + reg &= ~MPD_EN; + umac_writel(priv, reg, UMAC_MPD_CTRL); + ++ reg = intrl2_0_readl(priv, INTRL2_CPU_STATUS); ++ if (reg & INTRL2_0_MPD) ++ netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n"); ++ ++ if (reg & INTRL2_0_BRCM_MATCH_TAG) { ++ reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) & ++ RXCHK_BRCM_TAG_MATCH_MASK; ++ netdev_info(priv->netdev, ++ "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg); ++ } ++ + netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n"); + } + +@@ -1102,11 +1110,6 @@ static irqreturn_t bcm_sysport_rx_isr(in + if (priv->irq0_stat & INTRL2_0_TX_RING_FULL) + bcm_sysport_tx_reclaim_all(priv); + +- if (priv->irq0_stat & INTRL2_0_MPD) { +- netdev_info(priv->netdev, "Wake-on-LAN interrupt!\n"); +- bcm_sysport_resume_from_wol(priv); +- } +- + if (!priv->is_lite) + goto out; + +@@ -2459,9 +2462,6 @@ static int bcm_sysport_suspend_to_wol(st + /* UniMAC receive needs to be turned on */ + umac_enable_set(priv, CMD_RX_EN, 1); + +- /* Enable the interrupt wake-up source */ +- intrl2_0_mask_clear(priv, INTRL2_0_MPD); +- + netif_dbg(priv, wol, ndev, "entered WOL mode\n"); + + return 0; diff --git a/queue-4.18/net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch b/queue-4.18/net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch new file mode 100644 index 00000000000..0124cd62dfd --- /dev/null +++ b/queue-4.18/net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Yu Zhao +Date: Fri, 28 Sep 2018 17:04:30 -0600 +Subject: net/usb: cancel pending work when unbinding smsc75xx + +From: Yu Zhao + +[ Upstream commit f7b2a56e1f3dcbdb4cf09b2b63e859ffe0e09df8 ] + +Cancel pending work before freeing smsc75xx private data structure +during binding. This fixes the following crash in the driver: + +BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 +IP: mutex_lock+0x2b/0x3f + +Workqueue: events smsc75xx_deferred_multicast_write [smsc75xx] +task: ffff8caa83e85700 task.stack: ffff948b80518000 +RIP: 0010:mutex_lock+0x2b/0x3f + +Call Trace: + smsc75xx_deferred_multicast_write+0x40/0x1af [smsc75xx] + process_one_work+0x18d/0x2fc + worker_thread+0x1a2/0x269 + ? pr_cont_work+0x58/0x58 + kthread+0xfa/0x10a + ? pr_cont_work+0x58/0x58 + ? rcu_read_unlock_sched_notrace+0x48/0x48 + ret_from_fork+0x22/0x40 + +Signed-off-by: Yu Zhao +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/smsc75xx.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/smsc75xx.c ++++ b/drivers/net/usb/smsc75xx.c +@@ -1517,6 +1517,7 @@ static void smsc75xx_unbind(struct usbne + { + struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); + if (pdata) { ++ cancel_work_sync(&pdata->set_multicast); + netif_dbg(dev, ifdown, dev->net, "free pdata\n"); + kfree(pdata); + pdata = NULL; diff --git a/queue-4.18/netlabel-check-for-ipv4mask-in-addrinfo_get.patch b/queue-4.18/netlabel-check-for-ipv4mask-in-addrinfo_get.patch new file mode 100644 index 00000000000..9b76f511fce --- /dev/null +++ b/queue-4.18/netlabel-check-for-ipv4mask-in-addrinfo_get.patch @@ -0,0 +1,61 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Sean Tranchetti +Date: Thu, 20 Sep 2018 14:29:45 -0600 +Subject: netlabel: check for IPV4MASK in addrinfo_get + +From: Sean Tranchetti + +[ Upstream commit f88b4c01b97e09535505cf3c327fdbce55c27f00 ] + +netlbl_unlabel_addrinfo_get() assumes that if it finds the +NLBL_UNLABEL_A_IPV4ADDR attribute, it must also have the +NLBL_UNLABEL_A_IPV4MASK attribute as well. However, this is +not necessarily the case as the current checks in +netlbl_unlabel_staticadd() and friends are not sufficent to +enforce this. + +If passed a netlink message with NLBL_UNLABEL_A_IPV4ADDR, +NLBL_UNLABEL_A_IPV6ADDR, and NLBL_UNLABEL_A_IPV6MASK attributes, +these functions will all call netlbl_unlabel_addrinfo_get() which +will then attempt dereference NULL when fetching the non-existent +NLBL_UNLABEL_A_IPV4MASK attribute: + +Unable to handle kernel NULL pointer dereference at virtual address 0 +Process unlab (pid: 31762, stack limit = 0xffffff80502d8000) +Call trace: + netlbl_unlabel_addrinfo_get+0x44/0xd8 + netlbl_unlabel_staticremovedef+0x98/0xe0 + genl_rcv_msg+0x354/0x388 + netlink_rcv_skb+0xac/0x118 + genl_rcv+0x34/0x48 + netlink_unicast+0x158/0x1f0 + netlink_sendmsg+0x32c/0x338 + sock_sendmsg+0x44/0x60 + ___sys_sendmsg+0x1d0/0x2a8 + __sys_sendmsg+0x64/0xb4 + SyS_sendmsg+0x34/0x4c + el0_svc_naked+0x34/0x38 +Code: 51001149 7100113f 540000a0 f9401508 (79400108) +---[ end trace f6438a488e737143 ]--- +Kernel panic - not syncing: Fatal exception + +Signed-off-by: Sean Tranchetti + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlabel/netlabel_unlabeled.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/netlabel/netlabel_unlabeled.c ++++ b/net/netlabel/netlabel_unlabeled.c +@@ -781,7 +781,8 @@ static int netlbl_unlabel_addrinfo_get(s + { + u32 addr_len; + +- if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR]) { ++ if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] && ++ info->attrs[NLBL_UNLABEL_A_IPV4MASK]) { + addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); + if (addr_len != sizeof(struct in_addr) && + addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK])) diff --git a/queue-4.18/nfp-avoid-soft-lockups-under-control-message-storm.patch b/queue-4.18/nfp-avoid-soft-lockups-under-control-message-storm.patch new file mode 100644 index 00000000000..3ea5bb7682f --- /dev/null +++ b/queue-4.18/nfp-avoid-soft-lockups-under-control-message-storm.patch @@ -0,0 +1,63 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Jakub Kicinski +Date: Tue, 2 Oct 2018 10:10:14 -0700 +Subject: nfp: avoid soft lockups under control message storm + +From: Jakub Kicinski + +[ Upstream commit ff58e2df62ce29d0552278c290ae494b30fe0c6f ] + +When FW floods the driver with control messages try to exit the cmsg +processing loop every now and then to avoid soft lockups. Cmsg +processing is generally very lightweight so 512 seems like a reasonable +budget, which should not be exceeded under normal conditions. + +Fixes: 77ece8d5f196 ("nfp: add control vNIC datapath") +Signed-off-by: Jakub Kicinski +Reviewed-by: Simon Horman +Tested-by: Pieter Jansen van Vuuren +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +@@ -2068,14 +2068,17 @@ nfp_ctrl_rx_one(struct nfp_net *nn, stru + return true; + } + +-static void nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) ++static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) + { + struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; + struct nfp_net *nn = r_vec->nfp_net; + struct nfp_net_dp *dp = &nn->dp; ++ unsigned int budget = 512; + +- while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring)) ++ while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) + continue; ++ ++ return budget; + } + + static void nfp_ctrl_poll(unsigned long arg) +@@ -2087,9 +2090,13 @@ static void nfp_ctrl_poll(unsigned long + __nfp_ctrl_tx_queued(r_vec); + spin_unlock_bh(&r_vec->lock); + +- nfp_ctrl_rx(r_vec); +- +- nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); ++ if (nfp_ctrl_rx(r_vec)) { ++ nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); ++ } else { ++ tasklet_schedule(&r_vec->tasklet); ++ nn_dp_warn(&r_vec->nfp_net->dp, ++ "control message budget exceeded!\n"); ++ } + } + + /* Setup and Configuration diff --git a/queue-4.18/qed-fix-shmem-structure-inconsistency-between-driver-and-the-mfw.patch b/queue-4.18/qed-fix-shmem-structure-inconsistency-between-driver-and-the-mfw.patch new file mode 100644 index 00000000000..6a721d4e9ed --- /dev/null +++ b/queue-4.18/qed-fix-shmem-structure-inconsistency-between-driver-and-the-mfw.patch @@ -0,0 +1,34 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Sudarsana Reddy Kalluru +Date: Thu, 27 Sep 2018 04:12:10 -0700 +Subject: qed: Fix shmem structure inconsistency between driver and the mfw. + +From: Sudarsana Reddy Kalluru + +[ Upstream commit 5f672090e44f4951084c5e1d6b0668a5fc422af8 ] + +The structure shared between driver and the management FW (mfw) differ in +sizes. This would lead to issues when driver try to access the structure +members which are not-aligned with the mfw copy e.g., data_ptr usage in the +case of mfw_tlv request. +Align the driver structure with mfw copy, add reserved field(s) to driver +structure for the members not used by the driver. + +Fixes: dd006921d67f ("qed: Add MFW interfaces for TLV request support.) +Signed-off-by: Sudarsana Reddy Kalluru +Signed-off-by: Michal Kalderon +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qed/qed_hsi.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h ++++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h +@@ -11987,6 +11987,7 @@ struct public_global { + u32 running_bundle_id; + s32 external_temperature; + u32 mdump_reason; ++ u64 reserved; + u32 data_ptr; + u32 data_size; + }; diff --git a/queue-4.18/qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch b/queue-4.18/qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch new file mode 100644 index 00000000000..1a09552b86a --- /dev/null +++ b/queue-4.18/qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch @@ -0,0 +1,149 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Shahed Shaikh +Date: Wed, 26 Sep 2018 12:41:10 -0700 +Subject: qlcnic: fix Tx descriptor corruption on 82xx devices + +From: Shahed Shaikh + +[ Upstream commit c333fa0c4f220f8f7ea5acd6b0ebf3bf13fd684d ] + +In regular NIC transmission flow, driver always configures MAC using +Tx queue zero descriptor as a part of MAC learning flow. +But with multi Tx queue supported NIC, regular transmission can occur on +any non-zero Tx queue and from that context it uses +Tx queue zero descriptor to configure MAC, at the same time TX queue +zero could be used by another CPU for regular transmission +which could lead to Tx queue zero descriptor corruption and cause FW +abort. + +This patch fixes this in such a way that driver always configures +learned MAC address from the same Tx queue which is used for +regular transmission. + +Fixes: 7e2cf4feba05 ("qlcnic: change driver hardware interface mechanism") +Signed-off-by: Shahed Shaikh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 8 +++++--- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 3 ++- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h | 3 ++- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h | 3 ++- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 12 ++++++------ + 5 files changed, 17 insertions(+), 12 deletions(-) + +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +@@ -1800,7 +1800,8 @@ struct qlcnic_hardware_ops { + int (*config_loopback) (struct qlcnic_adapter *, u8); + int (*clear_loopback) (struct qlcnic_adapter *, u8); + int (*config_promisc_mode) (struct qlcnic_adapter *, u32); +- void (*change_l2_filter) (struct qlcnic_adapter *, u64 *, u16); ++ void (*change_l2_filter)(struct qlcnic_adapter *adapter, u64 *addr, ++ u16 vlan, struct qlcnic_host_tx_ring *tx_ring); + int (*get_board_info) (struct qlcnic_adapter *); + void (*set_mac_filter_count) (struct qlcnic_adapter *); + void (*free_mac_list) (struct qlcnic_adapter *); +@@ -2064,9 +2065,10 @@ static inline int qlcnic_nic_set_promisc + } + + static inline void qlcnic_change_filter(struct qlcnic_adapter *adapter, +- u64 *addr, u16 id) ++ u64 *addr, u16 vlan, ++ struct qlcnic_host_tx_ring *tx_ring) + { +- adapter->ahw->hw_ops->change_l2_filter(adapter, addr, id); ++ adapter->ahw->hw_ops->change_l2_filter(adapter, addr, vlan, tx_ring); + } + + static inline int qlcnic_get_board_info(struct qlcnic_adapter *adapter) +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +@@ -2135,7 +2135,8 @@ out: + } + + void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, +- u16 vlan_id) ++ u16 vlan_id, ++ struct qlcnic_host_tx_ring *tx_ring) + { + u8 mac[ETH_ALEN]; + memcpy(&mac, addr, ETH_ALEN); +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h +@@ -550,7 +550,8 @@ int qlcnic_83xx_wrt_reg_indirect(struct + int qlcnic_83xx_nic_set_promisc(struct qlcnic_adapter *, u32); + int qlcnic_83xx_config_hw_lro(struct qlcnic_adapter *, int); + int qlcnic_83xx_config_rss(struct qlcnic_adapter *, int); +-void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *, u64 *, u16); ++void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, ++ u16 vlan, struct qlcnic_host_tx_ring *ring); + int qlcnic_83xx_get_pci_info(struct qlcnic_adapter *, struct qlcnic_pci_info *); + int qlcnic_83xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *); + void qlcnic_83xx_initialize_nic(struct qlcnic_adapter *, int); +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h +@@ -173,7 +173,8 @@ int qlcnic_82xx_napi_add(struct qlcnic_a + struct net_device *netdev); + void qlcnic_82xx_get_beacon_state(struct qlcnic_adapter *); + void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, +- u64 *uaddr, u16 vlan_id); ++ u64 *uaddr, u16 vlan_id, ++ struct qlcnic_host_tx_ring *tx_ring); + int qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *, + struct ethtool_coalesce *); + int qlcnic_82xx_set_rx_coalesce(struct qlcnic_adapter *); +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +@@ -268,13 +268,12 @@ static void qlcnic_add_lb_filter(struct + } + + void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr, +- u16 vlan_id) ++ u16 vlan_id, struct qlcnic_host_tx_ring *tx_ring) + { + struct cmd_desc_type0 *hwdesc; + struct qlcnic_nic_req *req; + struct qlcnic_mac_req *mac_req; + struct qlcnic_vlan_req *vlan_req; +- struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring; + u32 producer; + u64 word; + +@@ -301,7 +300,8 @@ void qlcnic_82xx_change_filter(struct ql + + static void qlcnic_send_filter(struct qlcnic_adapter *adapter, + struct cmd_desc_type0 *first_desc, +- struct sk_buff *skb) ++ struct sk_buff *skb, ++ struct qlcnic_host_tx_ring *tx_ring) + { + struct vlan_ethhdr *vh = (struct vlan_ethhdr *)(skb->data); + struct ethhdr *phdr = (struct ethhdr *)(skb->data); +@@ -335,7 +335,7 @@ static void qlcnic_send_filter(struct ql + tmp_fil->vlan_id == vlan_id) { + if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) + qlcnic_change_filter(adapter, &src_addr, +- vlan_id); ++ vlan_id, tx_ring); + tmp_fil->ftime = jiffies; + return; + } +@@ -350,7 +350,7 @@ static void qlcnic_send_filter(struct ql + if (!fil) + return; + +- qlcnic_change_filter(adapter, &src_addr, vlan_id); ++ qlcnic_change_filter(adapter, &src_addr, vlan_id, tx_ring); + fil->ftime = jiffies; + fil->vlan_id = vlan_id; + memcpy(fil->faddr, &src_addr, ETH_ALEN); +@@ -766,7 +766,7 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_ + } + + if (adapter->drv_mac_learn) +- qlcnic_send_filter(adapter, first_desc, skb); ++ qlcnic_send_filter(adapter, first_desc, skb, tx_ring); + + tx_ring->tx_stats.tx_bytes += skb->len; + tx_ring->tx_stats.xmit_called++; diff --git a/queue-4.18/qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch b/queue-4.18/qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch new file mode 100644 index 00000000000..476e94e2658 --- /dev/null +++ b/queue-4.18/qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch @@ -0,0 +1,30 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Giacinto Cifelli +Date: Wed, 10 Oct 2018 20:05:53 +0200 +Subject: qmi_wwan: Added support for Gemalto's Cinterion ALASxx WWAN interface + +From: Giacinto Cifelli + +[ Upstream commit 4f7617705bfff84d756fe4401a1f4f032f374984 ] + +Added support for Gemalto's Cinterion ALASxx WWAN interfaces +by adding QMI_FIXED_INTF with Cinterion's VID and PID. + +Signed-off-by: Giacinto Cifelli +Acked-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/qmi_wwan.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -1234,6 +1234,7 @@ static const struct usb_device_id produc + {QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */ + {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */ + {QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */ ++ {QMI_FIXED_INTF(0x1e2d, 0x0063, 10)}, /* Cinterion ALASxx (1 RmNet) */ + {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */ + {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */ + {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/ diff --git a/queue-4.18/r8169-fix-network-stalls-due-to-missing-bit-txcfg_auto_fifo.patch b/queue-4.18/r8169-fix-network-stalls-due-to-missing-bit-txcfg_auto_fifo.patch new file mode 100644 index 00000000000..7ab5680dadd --- /dev/null +++ b/queue-4.18/r8169-fix-network-stalls-due-to-missing-bit-txcfg_auto_fifo.patch @@ -0,0 +1,104 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Heiner Kallweit +Date: Fri, 28 Sep 2018 23:51:54 +0200 +Subject: r8169: fix network stalls due to missing bit TXCFG_AUTO_FIFO + +From: Heiner Kallweit + +[ Upstream commit ad5f97faff4231e72b96bd96adbe1b6e977a9b86 ] + +Some of the chip-specific hw_start functions set bit TXCFG_AUTO_FIFO +in register TxConfig. The original patch changed the order of some +calls resulting in these changes being overwritten by +rtl_set_tx_config_registers() in rtl_hw_start(). This eventually +resulted in network stalls especially under high load. + +Analyzing the chip-specific hw_start functions all chip version from +34, with the exception of version 39, need this bit set. +This patch moves setting this bit to rtl_set_tx_config_registers(). + +Fixes: 4fd48c4ac0a0 ("r8169: move common initializations to tp->hw_start") +Reported-by: Ortwin Glück +Reported-by: David Arendt +Root-caused-by: Maciej S. Szmigiero +Tested-by: Tony Atkinson +Tested-by: David Arendt +Tested-by: Ortwin Glück +Signed-off-by: Heiner Kallweit +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169.c | 20 ++++++++------------ + 1 file changed, 8 insertions(+), 12 deletions(-) + +--- a/drivers/net/ethernet/realtek/r8169.c ++++ b/drivers/net/ethernet/realtek/r8169.c +@@ -5041,9 +5041,14 @@ static void rtl8169_hw_reset(struct rtl8 + + static void rtl_set_tx_config_registers(struct rtl8169_private *tp) + { +- /* Set DMA burst size and Interframe Gap Time */ +- RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) | +- (InterFrameGap << TxInterFrameGapShift)); ++ u32 val = TX_DMA_BURST << TxDMAShift | ++ InterFrameGap << TxInterFrameGapShift; ++ ++ if (tp->mac_version >= RTL_GIGA_MAC_VER_34 && ++ tp->mac_version != RTL_GIGA_MAC_VER_39) ++ val |= TXCFG_AUTO_FIFO; ++ ++ RTL_W32(tp, TxConfig, val); + } + + static void rtl_set_rx_max_size(struct rtl8169_private *tp) +@@ -5530,7 +5535,6 @@ static void rtl_hw_start_8168e_2(struct + + rtl_disable_clock_request(tp); + +- RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); + RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); + + /* Adjust EEE LED frequency */ +@@ -5562,7 +5566,6 @@ static void rtl_hw_start_8168f(struct rt + + rtl_disable_clock_request(tp); + +- RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); + RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); + RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN); + RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN); +@@ -5607,8 +5610,6 @@ static void rtl_hw_start_8411(struct rtl + + static void rtl_hw_start_8168g(struct rtl8169_private *tp) + { +- RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); +- + rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC); + rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC); + rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC); +@@ -5707,8 +5708,6 @@ static void rtl_hw_start_8168h_1(struct + RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en); + rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1)); + +- RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); +- + rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC); + rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC); + rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC); +@@ -5789,8 +5788,6 @@ static void rtl_hw_start_8168ep(struct r + { + rtl8168ep_stop_cmac(tp); + +- RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); +- + rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC); + rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC); + rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x5f, ERIAR_EXGMAC); +@@ -6108,7 +6105,6 @@ static void rtl_hw_start_8402(struct rtl + /* Force LAN exit from ASPM if Rx/Tx are not idle */ + RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800); + +- RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); + RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); + + rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402)); diff --git a/queue-4.18/r8169-set-rx_multi_en-bit-in-rxconfig-for-8168f-family-chips.patch b/queue-4.18/r8169-set-rx_multi_en-bit-in-rxconfig-for-8168f-family-chips.patch new file mode 100644 index 00000000000..142357afbe1 --- /dev/null +++ b/queue-4.18/r8169-set-rx_multi_en-bit-in-rxconfig-for-8168f-family-chips.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: "Maciej S. Szmigiero" +Date: Thu, 11 Oct 2018 16:02:10 +0200 +Subject: r8169: set RX_MULTI_EN bit in RxConfig for 8168F-family chips + +From: "Maciej S. Szmigiero" + +[ Upstream commit 511cfd580f23b0e0fcd5659931ef14c6e2c062b0 ] + +It has been reported that since +commit 05212ba8132b42 ("r8169: set RxConfig after tx/rx is enabled for RTL8169sb/8110sb devices") +at least RTL_GIGA_MAC_VER_38 NICs work erratically after a resume from +suspend. +The problem has been traced to a missing RX_MULTI_EN bit in the RxConfig +register. +We already set this bit for RTL_GIGA_MAC_VER_35 NICs of the same 8168F +chip family so let's do it also for its other siblings: RTL_GIGA_MAC_VER_36 +and RTL_GIGA_MAC_VER_38. + +Curiously, the NIC seems to work fine after a system boot without having +this bit set as long as the system isn't suspended and resumed. + +Fixes: 05212ba8132b42 ("r8169: set RxConfig after tx/rx is enabled for RTL8169sb/8110sb devices") +Reported-by: Chris Clayton +Signed-off-by: Maciej S. Szmigiero +Reviewed-by: Heiner Kallweit +Tested-by: Chris Clayton +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/realtek/r8169.c ++++ b/drivers/net/ethernet/realtek/r8169.c +@@ -4788,8 +4788,8 @@ static void rtl_init_rxcfg(struct rtl816 + RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST); + break; + case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_24: +- case RTL_GIGA_MAC_VER_34: +- case RTL_GIGA_MAC_VER_35: ++ case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_36: ++ case RTL_GIGA_MAC_VER_38: + RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST); + break; + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: diff --git a/queue-4.18/rtnetlink-fail-dump-if-target-netnsid-is-invalid.patch b/queue-4.18/rtnetlink-fail-dump-if-target-netnsid-is-invalid.patch new file mode 100644 index 00000000000..0fc861c0a4f --- /dev/null +++ b/queue-4.18/rtnetlink-fail-dump-if-target-netnsid-is-invalid.patch @@ -0,0 +1,36 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: David Ahern +Date: Fri, 28 Sep 2018 12:28:41 -0700 +Subject: rtnetlink: Fail dump if target netnsid is invalid + +From: David Ahern + +[ Upstream commit 893626d6a353d1356528f94e081246ecf233d77a ] + +Link dumps can return results from a target namespace. If the namespace id +is invalid, then the dump request should fail if get_target_net fails +rather than continuing with a dump of the current namespace. + +Fixes: 79e1ad148c844 ("rtnetlink: use netnsid to query interface") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -1848,10 +1848,8 @@ static int rtnl_dump_ifinfo(struct sk_bu + if (tb[IFLA_IF_NETNSID]) { + netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); + tgt_net = get_target_net(skb->sk, netnsid); +- if (IS_ERR(tgt_net)) { +- tgt_net = net; +- netnsid = -1; +- } ++ if (IS_ERR(tgt_net)) ++ return PTR_ERR(tgt_net); + } + + if (tb[IFLA_EXT_MASK]) diff --git a/queue-4.18/rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch b/queue-4.18/rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch new file mode 100644 index 00000000000..a1ccf01d052 --- /dev/null +++ b/queue-4.18/rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch @@ -0,0 +1,167 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Mauricio Faria de Oliveira +Date: Mon, 1 Oct 2018 22:46:40 -0300 +Subject: rtnetlink: fix rtnl_fdb_dump() for ndmsg header + +From: Mauricio Faria de Oliveira + +[ Upstream commit bd961c9bc66497f0c63f4ba1d02900bb85078366 ] + +Currently, rtnl_fdb_dump() assumes the family header is 'struct ifinfomsg', +which is not always true -- 'struct ndmsg' is used by iproute2 ('ip neigh'). + +The problem is, the function bails out early if nlmsg_parse() fails, which +does occur for iproute2 usage of 'struct ndmsg' because the payload length +is shorter than the family header alone (as 'struct ifinfomsg' is assumed). + +This breaks backward compatibility with userspace -- nothing is sent back. + +Some examples with iproute2 and netlink library for go [1]: + + 1) $ bridge fdb show + 33:33:00:00:00:01 dev ens3 self permanent + 01:00:5e:00:00:01 dev ens3 self permanent + 33:33:ff:15:98:30 dev ens3 self permanent + + This one works, as it uses 'struct ifinfomsg'. + + fdb_show() @ iproute2/bridge/fdb.c + """ + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + ... + if (rtnl_dump_request(&rth, RTM_GETNEIGH, [...] + """ + + 2) $ ip --family bridge neigh + RTNETLINK answers: Invalid argument + Dump terminated + + This one fails, as it uses 'struct ndmsg'. + + do_show_or_flush() @ iproute2/ip/ipneigh.c + """ + .n.nlmsg_type = RTM_GETNEIGH, + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), + """ + + 3) $ ./neighlist + < no output > + + This one fails, as it uses 'struct ndmsg'-based. + + neighList() @ netlink/neigh_linux.go + """ + req := h.newNetlinkRequest(unix.RTM_GETNEIGH, [...] + msg := Ndmsg{ + """ + +The actual breakage was introduced by commit 0ff50e83b512 ("net: rtnetlink: +bail out from rtnl_fdb_dump() on parse error"), because nlmsg_parse() fails +if the payload length (with the _actual_ family header) is less than the +family header length alone (which is assumed, in parameter 'hdrlen'). +This is true in the examples above with struct ndmsg, with size and payload +length shorter than struct ifinfomsg. + +However, that commit just intends to fix something under the assumption the +family header is indeed an 'struct ifinfomsg' - by preventing access to the +payload as such (via 'ifm' pointer) if the payload length is not sufficient +to actually contain it. + +The assumption was introduced by commit 5e6d24358799 ("bridge: netlink dump +interface at par with brctl"), to support iproute2's 'bridge fdb' command +(not 'ip neigh') which indeed uses 'struct ifinfomsg', thus is not broken. + +So, in order to unbreak the 'struct ndmsg' family headers and still allow +'struct ifinfomsg' to continue to work, check for the known message sizes +used with 'struct ndmsg' in iproute2 (with zero or one attribute which is +not used in this function anyway) then do not parse the data as ifinfomsg. + +Same examples with this patch applied (or revert/before the original fix): + + $ bridge fdb show + 33:33:00:00:00:01 dev ens3 self permanent + 01:00:5e:00:00:01 dev ens3 self permanent + 33:33:ff:15:98:30 dev ens3 self permanent + + $ ip --family bridge neigh + dev ens3 lladdr 33:33:00:00:00:01 PERMANENT + dev ens3 lladdr 01:00:5e:00:00:01 PERMANENT + dev ens3 lladdr 33:33:ff:15:98:30 PERMANENT + + $ ./neighlist + netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0x0, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0} + netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x1, 0x0, 0x5e, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0} + netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0xff, 0x15, 0x98, 0x30}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0} + +Tested on mainline (v4.19-rc6) and net-next (3bd09b05b068). + +References: + +[1] netlink library for go (test-case) + https://github.com/vishvananda/netlink + + $ cat ~/go/src/neighlist/main.go + package main + import ("fmt"; "syscall"; "github.com/vishvananda/netlink") + func main() { + neighs, _ := netlink.NeighList(0, syscall.AF_BRIDGE) + for _, neigh := range neighs { fmt.Printf("%#v\n", neigh) } + } + + $ export GOPATH=~/go + $ go get github.com/vishvananda/netlink + $ go build neighlist + $ ~/go/src/neighlist/neighlist + +Thanks to David Ahern for suggestions to improve this patch. + +Fixes: 0ff50e83b512 ("net: rtnetlink: bail out from rtnl_fdb_dump() on parse error") +Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl") +Reported-by: Aidan Obley +Signed-off-by: Mauricio Faria de Oliveira +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 29 ++++++++++++++++++++--------- + 1 file changed, 20 insertions(+), 9 deletions(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -3694,16 +3694,27 @@ static int rtnl_fdb_dump(struct sk_buff + int err = 0; + int fidx = 0; + +- err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, +- IFLA_MAX, ifla_policy, NULL); +- if (err < 0) { +- return -EINVAL; +- } else if (err == 0) { +- if (tb[IFLA_MASTER]) +- br_idx = nla_get_u32(tb[IFLA_MASTER]); +- } ++ /* A hack to preserve kernel<->userspace interface. ++ * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0. ++ * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails. ++ * So, check for ndmsg with an optional u32 attribute (not used here). ++ * Fortunately these sizes don't conflict with the size of ifinfomsg ++ * with an optional attribute. ++ */ ++ if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) && ++ (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) + ++ nla_attr_size(sizeof(u32)))) { ++ err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, ++ IFLA_MAX, ifla_policy, NULL); ++ if (err < 0) { ++ return -EINVAL; ++ } else if (err == 0) { ++ if (tb[IFLA_MASTER]) ++ br_idx = nla_get_u32(tb[IFLA_MASTER]); ++ } + +- brport_idx = ifm->ifi_index; ++ brport_idx = ifm->ifi_index; ++ } + + if (br_idx) { + br_dev = __dev_get_by_index(net, br_idx); diff --git a/queue-4.18/rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch b/queue-4.18/rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch new file mode 100644 index 00000000000..a506ef717e7 --- /dev/null +++ b/queue-4.18/rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch @@ -0,0 +1,54 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Eric Dumazet +Date: Tue, 2 Oct 2018 15:47:35 -0700 +Subject: rtnl: limit IFLA_NUM_TX_QUEUES and IFLA_NUM_RX_QUEUES to 4096 + +From: Eric Dumazet + +[ Upstream commit 0e1d6eca5113858ed2caea61a5adc03c595f6096 ] + +We have an impressive number of syzkaller bugs that are linked +to the fact that syzbot was able to create a networking device +with millions of TX (or RX) queues. + +Let's limit the number of RX/TX queues to 4096, this really should +cover all known cases. + +A separate patch will add various cond_resched() in the loops +handling sysfs entries at device creation and dismantle. + +Tested: + +lpaa6:~# ip link add gre-4097 numtxqueues 4097 numrxqueues 4097 type ip6gretap +RTNETLINK answers: Invalid argument + +lpaa6:~# time ip link add gre-4096 numtxqueues 4096 numrxqueues 4096 type ip6gretap + +real 0m0.180s +user 0m0.000s +sys 0m0.107s + +Fixes: 76ff5cc91935 ("rtnl: allow to specify number of rx and tx queues on device creation") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -2787,6 +2787,12 @@ struct net_device *rtnl_create_link(stru + else if (ops->get_num_rx_queues) + num_rx_queues = ops->get_num_rx_queues(); + ++ if (num_tx_queues < 1 || num_tx_queues > 4096) ++ return ERR_PTR(-EINVAL); ++ ++ if (num_rx_queues < 1 || num_rx_queues > 4096) ++ return ERR_PTR(-EINVAL); ++ + dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type, + ops->setup, num_tx_queues, num_rx_queues); + if (!dev) diff --git a/queue-4.18/sctp-update-dst-pmtu-with-the-correct-daddr.patch b/queue-4.18/sctp-update-dst-pmtu-with-the-correct-daddr.patch new file mode 100644 index 00000000000..773c87d8cc6 --- /dev/null +++ b/queue-4.18/sctp-update-dst-pmtu-with-the-correct-daddr.patch @@ -0,0 +1,65 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Xin Long +Date: Thu, 20 Sep 2018 17:27:28 +0800 +Subject: sctp: update dst pmtu with the correct daddr + +From: Xin Long + +[ Upstream commit d7ab5cdce54da631f0c8c11e506c974536a3581e ] + +When processing pmtu update from an icmp packet, it calls .update_pmtu +with sk instead of skb in sctp_transport_update_pmtu. + +However for sctp, the daddr in the transport might be different from +inet_sock->inet_daddr or sk->sk_v6_daddr, which is used to update or +create the route cache. The incorrect daddr will cause a different +route cache created for the path. + +So before calling .update_pmtu, inet_sock->inet_daddr/sk->sk_v6_daddr +should be updated with the daddr in the transport, and update it back +after it's done. + +The issue has existed since route exceptions introduction. + +Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions.") +Reported-by: ian.periam@dialogic.com +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/transport.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/net/sctp/transport.c ++++ b/net/sctp/transport.c +@@ -260,6 +260,7 @@ void sctp_transport_pmtu(struct sctp_tra + bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) + { + struct dst_entry *dst = sctp_transport_dst_check(t); ++ struct sock *sk = t->asoc->base.sk; + bool change = true; + + if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { +@@ -271,12 +272,19 @@ bool sctp_transport_update_pmtu(struct s + pmtu = SCTP_TRUNC4(pmtu); + + if (dst) { +- dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); ++ struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family); ++ union sctp_addr addr; ++ ++ pf->af->from_sk(&addr, sk); ++ pf->to_sk_daddr(&t->ipaddr, sk); ++ dst->ops->update_pmtu(dst, sk, NULL, pmtu); ++ pf->to_sk_daddr(&addr, sk); ++ + dst = sctp_transport_dst_check(t); + } + + if (!dst) { +- t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); ++ t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); + dst = t->dst; + } + diff --git a/queue-4.18/series b/queue-4.18/series new file mode 100644 index 00000000000..d306c799c6d --- /dev/null +++ b/queue-4.18/series @@ -0,0 +1,62 @@ +bnxt_en-fix-tx-timeout-during-netpoll.patch +bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch +bonding-avoid-possible-dead-lock.patch +ip6_tunnel-be-careful-when-accessing-the-inner-header.patch +ip_tunnel-be-careful-when-accessing-the-inner-header.patch +ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch +ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch +net-dsa-bcm_sf2-call-setup-during-switch-resume.patch +net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch +net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch +net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch +netlabel-check-for-ipv4mask-in-addrinfo_get.patch +net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch +net-mvpp2-fix-a-txq_done-race-condition.patch +net-sched-add-policy-validation-for-tc-attributes.patch +net-sched-cls_u32-fix-hnode-refcounting.patch +net-systemport-fix-wake-up-interrupt-race-during-resume.patch +net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch +qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch +qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch +rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch +rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch +sctp-update-dst-pmtu-with-the-correct-daddr.patch +team-forbid-enslaving-team-device-to-itself.patch +tipc-fix-flow-control-accounting-for-implicit-connect.patch +udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch +net-qualcomm-rmnet-skip-processing-loopback-packets.patch +net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-transmit.patch +net-qualcomm-rmnet-fix-incorrect-allocation-flag-in-receive-path.patch +tun-remove-unused-parameters.patch +tun-initialize-napi_mutex-unconditionally.patch +tun-napi-flags-belong-to-tfile.patch +net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch +net-packet-fix-packet-drop-as-of-virtio-gso.patch +net-dsa-bcm_sf2-fix-unbind-ordering.patch +net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch +net-aquantia-memory-corruption-on-jumbo-frames.patch +net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch +bonding-pass-link-local-packets-to-bonding-master-also.patch +bonding-fix-warning-message.patch +net-stmmac-rework-coalesce-timer-and-fix-multi-queue-races.patch +nfp-avoid-soft-lockups-under-control-message-storm.patch +bnxt_en-don-t-try-to-offload-vlan-modify-action.patch +net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch +net-phy-phylink-fix-sfp-interface-autodetection.patch +sfp-fix-oops-with-ethtool-m.patch +tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch +inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch +net-dsa-b53-keep-cpu-port-as-tagged-in-all-vlans.patch +rtnetlink-fail-dump-if-target-netnsid-is-invalid.patch +bnxt_en-fix-vnic-reservations-on-the-pf.patch +net-ipv4-don-t-let-pmtu-updates-increase-route-mtu.patch +net-mlx5-check-for-sq-and-not-rq-state-when-modifying-hairpin-sq.patch +bnxt_en-fix-enables-field-in-hwrm_queue_cos2bw_cfg-request.patch +bnxt_en-get-the-reduced-max_irqs-by-the-ones-used-by-rdma.patch +net-ipv6-remove-extra-call-to-ip6_convert_metrics-for-multipath-case.patch +net-ipv6-stop-leaking-percpu-memory-in-fib6-info.patch +net-mscc-fix-the-frame-extraction-into-the-skb.patch +qed-fix-shmem-structure-inconsistency-between-driver-and-the-mfw.patch +r8169-fix-network-stalls-due-to-missing-bit-txcfg_auto_fifo.patch +r8169-set-rx_multi_en-bit-in-rxconfig-for-8168f-family-chips.patch +vxlan-fill-ttl-inherit-info.patch diff --git a/queue-4.18/sfp-fix-oops-with-ethtool-m.patch b/queue-4.18/sfp-fix-oops-with-ethtool-m.patch new file mode 100644 index 00000000000..a6d8c5ee736 --- /dev/null +++ b/queue-4.18/sfp-fix-oops-with-ethtool-m.patch @@ -0,0 +1,71 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Russell King +Date: Tue, 18 Sep 2018 16:48:53 +0100 +Subject: sfp: fix oops with ethtool -m + +From: Russell King + +[ Upstream commit 126d6848ef13958e1cb959e96c21d19bc498ade9 ] + +If a network interface is created prior to the SFP socket being +available, ethtool can request module information. This unfortunately +leads to an oops: + +Unable to handle kernel NULL pointer dereference at virtual address 00000008 +pgd = (ptrval) +[00000008] *pgd=7c400831, *pte=00000000, *ppte=00000000 +Internal error: Oops: 17 [#1] SMP ARM +Modules linked in: +CPU: 0 PID: 1480 Comm: ethtool Not tainted 4.19.0-rc3 #138 +Hardware name: Broadcom Northstar Plus SoC +PC is at sfp_get_module_info+0x8/0x10 +LR is at dev_ethtool+0x218c/0x2afc + +Fix this by not filling in the network device's SFP bus pointer until +SFP is fully bound, thereby avoiding the core calling into the SFP bus +code. + +Fixes: ce0aa27ff3f6 ("sfp: add sfp-bus to bridge between network devices and sfp cages") +Reported-by: Florian Fainelli +Tested-by: Florian Fainelli +Signed-off-by: Russell King +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/sfp-bus.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/phy/sfp-bus.c ++++ b/drivers/net/phy/sfp-bus.c +@@ -349,6 +349,7 @@ static int sfp_register_bus(struct sfp_b + } + if (bus->started) + bus->socket_ops->start(bus->sfp); ++ bus->netdev->sfp_bus = bus; + bus->registered = true; + return 0; + } +@@ -357,6 +358,7 @@ static void sfp_unregister_bus(struct sf + { + const struct sfp_upstream_ops *ops = bus->upstream_ops; + ++ bus->netdev->sfp_bus = NULL; + if (bus->registered) { + if (bus->started) + bus->socket_ops->stop(bus->sfp); +@@ -438,7 +440,6 @@ static void sfp_upstream_clear(struct sf + { + bus->upstream_ops = NULL; + bus->upstream = NULL; +- bus->netdev->sfp_bus = NULL; + bus->netdev = NULL; + } + +@@ -467,7 +468,6 @@ struct sfp_bus *sfp_register_upstream(st + bus->upstream_ops = ops; + bus->upstream = upstream; + bus->netdev = ndev; +- ndev->sfp_bus = bus; + + if (bus->sfp) { + ret = sfp_register_bus(bus); diff --git a/queue-4.18/tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch b/queue-4.18/tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch new file mode 100644 index 00000000000..e4d19179a86 --- /dev/null +++ b/queue-4.18/tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch @@ -0,0 +1,85 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Eric Dumazet +Date: Mon, 1 Oct 2018 15:02:26 -0700 +Subject: tcp/dccp: fix lockdep issue when SYN is backlogged + +From: Eric Dumazet + +[ Upstream commit 1ad98e9d1bdf4724c0a8532fabd84bf3c457c2bc ] + +In normal SYN processing, packets are handled without listener +lock and in RCU protected ingress path. + +But syzkaller is known to be able to trick us and SYN +packets might be processed in process context, after being +queued into socket backlog. + +In commit 06f877d613be ("tcp/dccp: fix other lockdep splats +accessing ireq_opt") I made a very stupid fix, that happened +to work mostly because of the regular path being RCU protected. + +Really the thing protecting ireq->ireq_opt is RCU read lock, +and the pseudo request refcnt is not relevant. + +This patch extends what I did in commit 449809a66c1d ("tcp/dccp: +block BH for SYN processing") by adding an extra rcu_read_{lock|unlock} +pair in the paths that might be taken when processing SYN from +socket backlog (thus possibly in process context) + +Fixes: 06f877d613be ("tcp/dccp: fix other lockdep splats accessing ireq_opt") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_sock.h | 3 +-- + net/dccp/input.c | 4 +++- + net/ipv4/tcp_input.c | 4 +++- + 3 files changed, 7 insertions(+), 4 deletions(-) + +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -132,8 +132,7 @@ static inline int inet_request_bound_dev + + static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) + { +- return rcu_dereference_check(ireq->ireq_opt, +- refcount_read(&ireq->req.rsk_refcnt) > 0); ++ return rcu_dereference(ireq->ireq_opt); + } + + struct inet_cork { +--- a/net/dccp/input.c ++++ b/net/dccp/input.c +@@ -606,11 +606,13 @@ int dccp_rcv_state_process(struct sock * + if (sk->sk_state == DCCP_LISTEN) { + if (dh->dccph_type == DCCP_PKT_REQUEST) { + /* It is possible that we process SYN packets from backlog, +- * so we need to make sure to disable BH right there. ++ * so we need to make sure to disable BH and RCU right there. + */ ++ rcu_read_lock(); + local_bh_disable(); + acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0; + local_bh_enable(); ++ rcu_read_unlock(); + if (!acceptable) + return 1; + consume_skb(skb); +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -5976,11 +5976,13 @@ int tcp_rcv_state_process(struct sock *s + if (th->fin) + goto discard; + /* It is possible that we process SYN packets from backlog, +- * so we need to make sure to disable BH right there. ++ * so we need to make sure to disable BH and RCU right there. + */ ++ rcu_read_lock(); + local_bh_disable(); + acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; + local_bh_enable(); ++ rcu_read_unlock(); + + if (!acceptable) + return 1; diff --git a/queue-4.18/team-forbid-enslaving-team-device-to-itself.patch b/queue-4.18/team-forbid-enslaving-team-device-to-itself.patch new file mode 100644 index 00000000000..ba645a50910 --- /dev/null +++ b/queue-4.18/team-forbid-enslaving-team-device-to-itself.patch @@ -0,0 +1,125 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Ido Schimmel +Date: Mon, 1 Oct 2018 12:21:59 +0300 +Subject: team: Forbid enslaving team device to itself + +From: Ido Schimmel + +[ Upstream commit 471b83bd8bbe4e89743683ef8ecb78f7029d8288 ] + +team's ndo_add_slave() acquires 'team->lock' and later tries to open the +newly enslaved device via dev_open(). This emits a 'NETDEV_UP' event +that causes the VLAN driver to add VLAN 0 on the team device. team's +ndo_vlan_rx_add_vid() will also try to acquire 'team->lock' and +deadlock. + +Fix this by checking early at the enslavement function that a team +device is not being enslaved to itself. + +A similar check was added to the bond driver in commit 09a89c219baf +("bonding: disallow enslaving a bond to itself"). + +WARNING: possible recursive locking detected +4.18.0-rc7+ #176 Not tainted +-------------------------------------------- +syz-executor4/6391 is trying to acquire lock: +(____ptrval____) (&team->lock){+.+.}, at: team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868 + +but task is already holding lock: +(____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947 + +other info that might help us debug this: + Possible unsafe locking scenario: + + CPU0 + ---- + lock(&team->lock); + lock(&team->lock); + + *** DEADLOCK *** + + May be due to missing lock nesting notation + +2 locks held by syz-executor4/6391: + #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline] + #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4662 + #1: (____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947 + +stack backtrace: +CPU: 1 PID: 6391 Comm: syz-executor4 Not tainted 4.18.0-rc7+ #176 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 + print_deadlock_bug kernel/locking/lockdep.c:1765 [inline] + check_deadlock kernel/locking/lockdep.c:1809 [inline] + validate_chain kernel/locking/lockdep.c:2405 [inline] + __lock_acquire.cold.64+0x1fb/0x486 kernel/locking/lockdep.c:3435 + lock_acquire+0x1e4/0x540 kernel/locking/lockdep.c:3924 + __mutex_lock_common kernel/locking/mutex.c:757 [inline] + __mutex_lock+0x176/0x1820 kernel/locking/mutex.c:894 + mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:909 + team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868 + vlan_add_rx_filter_info+0x14a/0x1d0 net/8021q/vlan_core.c:210 + __vlan_vid_add net/8021q/vlan_core.c:278 [inline] + vlan_vid_add+0x63e/0x9d0 net/8021q/vlan_core.c:308 + vlan_device_event.cold.12+0x2a/0x2f net/8021q/vlan.c:381 + notifier_call_chain+0x180/0x390 kernel/notifier.c:93 + __raw_notifier_call_chain kernel/notifier.c:394 [inline] + raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 + call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1735 + call_netdevice_notifiers net/core/dev.c:1753 [inline] + dev_open+0x173/0x1b0 net/core/dev.c:1433 + team_port_add drivers/net/team/team.c:1219 [inline] + team_add_slave+0xa8b/0x1c30 drivers/net/team/team.c:1948 + do_set_master+0x1c9/0x220 net/core/rtnetlink.c:2248 + do_setlink+0xba4/0x3e10 net/core/rtnetlink.c:2382 + rtnl_setlink+0x2a9/0x400 net/core/rtnetlink.c:2636 + rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4665 + netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2455 + rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4683 + netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] + netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 + netlink_sendmsg+0xa18/0xfd0 net/netlink/af_netlink.c:1908 + sock_sendmsg_nosec net/socket.c:642 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:652 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2126 + __sys_sendmsg+0x11d/0x290 net/socket.c:2164 + __do_sys_sendmsg net/socket.c:2173 [inline] + __se_sys_sendmsg net/socket.c:2171 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2171 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x456b29 +Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007f9706bf8c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f9706bf96d4 RCX: 0000000000456b29 +RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000004 +RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 00000000004d3548 R14: 00000000004c8227 R15: 0000000000000000 + +Fixes: 87002b03baab ("net: introduce vlan_vid_[add/del] and use them instead of direct [add/kill]_vid ndo calls") +Signed-off-by: Ido Schimmel +Reported-and-tested-by: syzbot+bd051aba086537515cdb@syzkaller.appspotmail.com +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/team/team.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -1172,6 +1172,12 @@ static int team_port_add(struct team *te + return -EBUSY; + } + ++ if (dev == port_dev) { ++ NL_SET_ERR_MSG(extack, "Cannot enslave team device to itself"); ++ netdev_err(dev, "Cannot enslave team device to itself\n"); ++ return -EINVAL; ++ } ++ + if (port_dev->features & NETIF_F_VLAN_CHALLENGED && + vlan_uses_dev(dev)) { + NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up"); diff --git a/queue-4.18/tipc-fix-flow-control-accounting-for-implicit-connect.patch b/queue-4.18/tipc-fix-flow-control-accounting-for-implicit-connect.patch new file mode 100644 index 00000000000..742f9682785 --- /dev/null +++ b/queue-4.18/tipc-fix-flow-control-accounting-for-implicit-connect.patch @@ -0,0 +1,41 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Parthasarathy Bhuvaragan +Date: Tue, 25 Sep 2018 18:21:58 +0200 +Subject: tipc: fix flow control accounting for implicit connect + +From: Parthasarathy Bhuvaragan + +[ Upstream commit 92ef12b32feab8f277b69e9fb89ede2796777f4d ] + +In the case of implicit connect message with data > 1K, the flow +control accounting is incorrect. At this state, the socket does not +know the peer nodes capability and falls back to legacy flow control +by return 1, however the receiver of this message will perform the +new block accounting. This leads to a slack and eventually traffic +disturbance. + +In this commit, we perform tipc_node_get_capabilities() at implicit +connect and perform accounting based on the peer's capability. + +Signed-off-by: Parthasarathy Bhuvaragan +Signed-off-by: Jon Maloy +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/socket.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/tipc/socket.c ++++ b/net/tipc/socket.c +@@ -1422,8 +1422,10 @@ static int __tipc_sendstream(struct sock + /* Handle implicit connection setup */ + if (unlikely(dest)) { + rc = __tipc_sendmsg(sock, m, dlen); +- if (dlen && (dlen == rc)) ++ if (dlen && dlen == rc) { ++ tsk->peer_caps = tipc_node_get_capabilities(net, dnode); + tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr)); ++ } + return rc; + } + diff --git a/queue-4.18/tun-initialize-napi_mutex-unconditionally.patch b/queue-4.18/tun-initialize-napi_mutex-unconditionally.patch new file mode 100644 index 00000000000..16eed27714a --- /dev/null +++ b/queue-4.18/tun-initialize-napi_mutex-unconditionally.patch @@ -0,0 +1,45 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Eric Dumazet +Date: Fri, 28 Sep 2018 14:51:48 -0700 +Subject: tun: initialize napi_mutex unconditionally + +From: Eric Dumazet + +[ Upstream commit c7256f579f8302ce2c038181c30060d0b40017b2 ] + +This is the first part to fix following syzbot report : + +console output: https://syzkaller.appspot.com/x/log.txt?x=145378e6400000 +kernel config: https://syzkaller.appspot.com/x/.config?x=443816db871edd66 +dashboard link: https://syzkaller.appspot.com/bug?extid=e662df0ac1d753b57e80 + +Following patch is fixing the race condition, but it seems safer +to initialize this mutex at tfile creation anyway. + +Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") +Signed-off-by: Eric Dumazet +Reported-by: syzbot+e662df0ac1d753b57e80@syzkaller.appspotmail.com +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -319,7 +319,6 @@ static void tun_napi_init(struct tun_str + netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll, + NAPI_POLL_WEIGHT); + napi_enable(&tfile->napi); +- mutex_init(&tfile->napi_mutex); + } + } + +@@ -3241,6 +3240,7 @@ static int tun_chr_open(struct inode *in + return -ENOMEM; + } + ++ mutex_init(&tfile->napi_mutex); + RCU_INIT_POINTER(tfile->tun, NULL); + tfile->flags = 0; + tfile->ifindex = 0; diff --git a/queue-4.18/tun-napi-flags-belong-to-tfile.patch b/queue-4.18/tun-napi-flags-belong-to-tfile.patch new file mode 100644 index 00000000000..5868c02001b --- /dev/null +++ b/queue-4.18/tun-napi-flags-belong-to-tfile.patch @@ -0,0 +1,189 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Eric Dumazet +Date: Fri, 28 Sep 2018 14:51:49 -0700 +Subject: tun: napi flags belong to tfile + +From: Eric Dumazet + +[ Upstream commit af3fb24eecb2c59246e03c99386037fd5ad84ffd ] + +Since tun->flags might be shared by multiple tfile structures, +it is better to make sure tun_get_user() is using the flags +for the current tfile. + +Presence of the READ_ONCE() in tun_napi_frags_enabled() gave a hint +of what could happen, but we need something stronger to please +syzbot. + +kasan: CONFIG_KASAN_INLINE enabled +kasan: GPF could be caused by NULL-ptr deref or user memory access +general protection fault: 0000 [#1] PREEMPT SMP KASAN +CPU: 0 PID: 13647 Comm: syz-executor5 Not tainted 4.19.0-rc5+ #59 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +RIP: 0010:dev_gro_receive+0x132/0x2720 net/core/dev.c:5427 +Code: 48 c1 ea 03 80 3c 02 00 0f 85 6e 20 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b 6e 10 49 8d bd d0 00 00 00 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 59 20 00 00 4d 8b a5 d0 00 00 00 31 ff 41 81 e4 +RSP: 0018:ffff8801c400f410 EFLAGS: 00010202 +RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff8618d325 +RDX: 000000000000001a RSI: ffffffff86189f97 RDI: 00000000000000d0 +RBP: ffff8801c400f608 R08: ffff8801c8fb4300 R09: 0000000000000000 +R10: ffffed0038801ed7 R11: 0000000000000003 R12: ffff8801d327d358 +R13: 0000000000000000 R14: ffff8801c16dd8c0 R15: 0000000000000004 +FS: 00007fe003615700(0000) GS:ffff8801dac00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007fe1f3c43db8 CR3: 00000001bebb2000 CR4: 00000000001406f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + napi_gro_frags+0x3f4/0xc90 net/core/dev.c:5715 + tun_get_user+0x31d5/0x42a0 drivers/net/tun.c:1922 + tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:1967 + call_write_iter include/linux/fs.h:1808 [inline] + new_sync_write fs/read_write.c:474 [inline] + __vfs_write+0x6b8/0x9f0 fs/read_write.c:487 + vfs_write+0x1fc/0x560 fs/read_write.c:549 + ksys_write+0x101/0x260 fs/read_write.c:598 + __do_sys_write fs/read_write.c:610 [inline] + __se_sys_write fs/read_write.c:607 [inline] + __x64_sys_write+0x73/0xb0 fs/read_write.c:607 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x457579 +Code: 1d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007fe003614c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 +RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457579 +RDX: 0000000000000012 RSI: 0000000020000000 RDI: 000000000000000a +RBP: 000000000072c040 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe0036156d4 +R13: 00000000004c5574 R14: 00000000004d8e98 R15: 00000000ffffffff +Modules linked in: + +RIP: 0010:dev_gro_receive+0x132/0x2720 net/core/dev.c:5427 +Code: 48 c1 ea 03 80 3c 02 00 0f 85 6e 20 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b 6e 10 49 8d bd d0 00 00 00 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 59 20 00 00 4d 8b a5 d0 00 00 00 31 ff 41 81 e4 +RSP: 0018:ffff8801c400f410 EFLAGS: 00010202 +RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff8618d325 +RDX: 000000000000001a RSI: ffffffff86189f97 RDI: 00000000000000d0 +RBP: ffff8801c400f608 R08: ffff8801c8fb4300 R09: 0000000000000000 +R10: ffffed0038801ed7 R11: 0000000000000003 R12: ffff8801d327d358 +R13: 0000000000000000 R14: ffff8801c16dd8c0 R15: 0000000000000004 +FS: 00007fe003615700(0000) GS:ffff8801dac00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007fe1f3c43db8 CR3: 00000001bebb2000 CR4: 00000000001406f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + +Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 29 ++++++++++++++++------------- + 1 file changed, 16 insertions(+), 13 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -181,6 +181,7 @@ struct tun_file { + }; + struct napi_struct napi; + bool napi_enabled; ++ bool napi_frags_enabled; + struct mutex napi_mutex; /* Protects access to the above napi */ + struct list_head next; + struct tun_struct *detached; +@@ -312,9 +313,10 @@ static int tun_napi_poll(struct napi_str + } + + static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, +- bool napi_en) ++ bool napi_en, bool napi_frags) + { + tfile->napi_enabled = napi_en; ++ tfile->napi_frags_enabled = napi_en && napi_frags; + if (napi_en) { + netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll, + NAPI_POLL_WEIGHT); +@@ -334,9 +336,9 @@ static void tun_napi_del(struct tun_file + netif_napi_del(&tfile->napi); + } + +-static bool tun_napi_frags_enabled(const struct tun_struct *tun) ++static bool tun_napi_frags_enabled(const struct tun_file *tfile) + { +- return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS; ++ return tfile->napi_frags_enabled; + } + + #ifdef CONFIG_TUN_VNET_CROSS_LE +@@ -790,7 +792,7 @@ static void tun_detach_all(struct net_de + } + + static int tun_attach(struct tun_struct *tun, struct file *file, +- bool skip_filter, bool napi) ++ bool skip_filter, bool napi, bool napi_frags) + { + struct tun_file *tfile = file->private_data; + struct net_device *dev = tun->dev; +@@ -863,7 +865,7 @@ static int tun_attach(struct tun_struct + tun_enable_queue(tfile); + } else { + sock_hold(&tfile->sk); +- tun_napi_init(tun, tfile, napi); ++ tun_napi_init(tun, tfile, napi, napi_frags); + } + + tun_set_real_num_queues(tun); +@@ -1173,13 +1175,11 @@ static void tun_poll_controller(struct n + struct tun_file *tfile; + int i; + +- if (tun_napi_frags_enabled(tun)) +- return; +- + rcu_read_lock(); + for (i = 0; i < tun->numqueues; i++) { + tfile = rcu_dereference(tun->tfiles[i]); +- if (tfile->napi_enabled) ++ if (!tun_napi_frags_enabled(tfile) && ++ tfile->napi_enabled) + napi_schedule(&tfile->napi); + } + rcu_read_unlock(); +@@ -1750,7 +1750,7 @@ static ssize_t tun_get_user(struct tun_s + int err; + u32 rxhash = 0; + int skb_xdp = 1; +- bool frags = tun_napi_frags_enabled(tun); ++ bool frags = tun_napi_frags_enabled(tfile); + + if (!(tun->dev->flags & IFF_UP)) + return -EIO; +@@ -2575,7 +2575,8 @@ static int tun_set_iff(struct net *net, + return err; + + err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER, +- ifr->ifr_flags & IFF_NAPI); ++ ifr->ifr_flags & IFF_NAPI, ++ ifr->ifr_flags & IFF_NAPI_FRAGS); + if (err < 0) + return err; + +@@ -2673,7 +2674,8 @@ static int tun_set_iff(struct net *net, + (ifr->ifr_flags & TUN_FEATURES); + + INIT_LIST_HEAD(&tun->disabled); +- err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI); ++ err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, ++ ifr->ifr_flags & IFF_NAPI_FRAGS); + if (err < 0) + goto err_free_flow; + +@@ -2822,7 +2824,8 @@ static int tun_set_queue(struct file *fi + ret = security_tun_dev_attach_queue(tun->security); + if (ret < 0) + goto unlock; +- ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI); ++ ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI, ++ tun->flags & IFF_NAPI_FRAGS); + } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { + tun = rtnl_dereference(tfile->tun); + if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) diff --git a/queue-4.18/tun-remove-unused-parameters.patch b/queue-4.18/tun-remove-unused-parameters.patch new file mode 100644 index 00000000000..cce0df44593 --- /dev/null +++ b/queue-4.18/tun-remove-unused-parameters.patch @@ -0,0 +1,66 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Eric Dumazet +Date: Fri, 28 Sep 2018 14:51:47 -0700 +Subject: tun: remove unused parameters + +From: Eric Dumazet + +[ Upstream commit 06e55addd3f40b5294e448c2cb7605ca4f28c2e3 ] + +tun_napi_disable() and tun_napi_del() do not need +a pointer to the tun_struct + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -323,13 +323,13 @@ static void tun_napi_init(struct tun_str + } + } + +-static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile) ++static void tun_napi_disable(struct tun_file *tfile) + { + if (tfile->napi_enabled) + napi_disable(&tfile->napi); + } + +-static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile) ++static void tun_napi_del(struct tun_file *tfile) + { + if (tfile->napi_enabled) + netif_napi_del(&tfile->napi); +@@ -688,8 +688,8 @@ static void __tun_detach(struct tun_file + tun = rtnl_dereference(tfile->tun); + + if (tun && clean) { +- tun_napi_disable(tun, tfile); +- tun_napi_del(tun, tfile); ++ tun_napi_disable(tfile); ++ tun_napi_del(tfile); + } + + if (tun && !tfile->detached) { +@@ -756,7 +756,7 @@ static void tun_detach_all(struct net_de + for (i = 0; i < n; i++) { + tfile = rtnl_dereference(tun->tfiles[i]); + BUG_ON(!tfile); +- tun_napi_disable(tun, tfile); ++ tun_napi_disable(tfile); + tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; + tfile->socket.sk->sk_data_ready(tfile->socket.sk); + RCU_INIT_POINTER(tfile->tun, NULL); +@@ -772,7 +772,7 @@ static void tun_detach_all(struct net_de + synchronize_net(); + for (i = 0; i < n; i++) { + tfile = rtnl_dereference(tun->tfiles[i]); +- tun_napi_del(tun, tfile); ++ tun_napi_del(tfile); + /* Drop read queue */ + tun_queue_purge(tfile); + xdp_rxq_info_unreg(&tfile->xdp_rxq); diff --git a/queue-4.18/udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch b/queue-4.18/udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch new file mode 100644 index 00000000000..e3c3be3583e --- /dev/null +++ b/queue-4.18/udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch @@ -0,0 +1,43 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Jiri Kosina +Date: Thu, 4 Oct 2018 13:37:32 +0200 +Subject: udp: Unbreak modules that rely on external __skb_recv_udp() availability + +From: Jiri Kosina + +[ Upstream commit 7e823644b60555f70f241274b8d0120dd919269a ] + +Commit 2276f58ac589 ("udp: use a separate rx queue for packet reception") +turned static inline __skb_recv_udp() from being a trivial helper around +__skb_recv_datagram() into a UDP specific implementaion, making it +EXPORT_SYMBOL_GPL() at the same time. + +There are external modules that got broken by __skb_recv_udp() not being +visible to them. Let's unbreak them by making __skb_recv_udp EXPORT_SYMBOL(). + +Rationale (one of those) why this is actually "technically correct" thing +to do: __skb_recv_udp() used to be an inline wrapper around +__skb_recv_datagram(), which itself (still, and correctly so, I believe) +is EXPORT_SYMBOL(). + +Cc: Paolo Abeni +Cc: Eric Dumazet +Fixes: 2276f58ac589 ("udp: use a separate rx queue for packet reception") +Signed-off-by: Jiri Kosina +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1631,7 +1631,7 @@ busy_check: + *err = error; + return NULL; + } +-EXPORT_SYMBOL_GPL(__skb_recv_udp); ++EXPORT_SYMBOL(__skb_recv_udp); + + /* + * This should be easy, if there is something there we diff --git a/queue-4.18/vxlan-fill-ttl-inherit-info.patch b/queue-4.18/vxlan-fill-ttl-inherit-info.patch new file mode 100644 index 00000000000..c3c6bb9c68d --- /dev/null +++ b/queue-4.18/vxlan-fill-ttl-inherit-info.patch @@ -0,0 +1,39 @@ +From foo@baz Tue Oct 16 07:06:17 CEST 2018 +From: Hangbin Liu +Date: Wed, 26 Sep 2018 10:35:42 +0800 +Subject: vxlan: fill ttl inherit info + +From: Hangbin Liu + +[ Upstream commit 8fd780698745ba121530c5c20fd237aacde4c371 ] + +When add vxlan ttl inherit support, I forgot to fill it when dump +vlxan info. Fix it now. + +Fixes: 72f6d71e491e6 ("vxlan: add ttl inherit support") +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -3537,6 +3537,7 @@ static size_t vxlan_get_size(const struc + nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */ + nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ ++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL_INHERIT */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */ + nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ +@@ -3601,6 +3602,8 @@ static int vxlan_fill_info(struct sk_buf + } + + if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) || ++ nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT, ++ !!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) || + nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) || + nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) || + nla_put_u8(skb, IFLA_VXLAN_LEARNING,