From: Greg Kroah-Hartman Date: Tue, 16 Oct 2018 05:07:32 +0000 (+0200) Subject: 4.14-stable patches X-Git-Tag: v4.9.134~29 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=72f3d49d0289760c27f306a143a05fc199cd256e;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: bnxt_en-don-t-try-to-offload-vlan-modify-action.patch bnxt_en-fix-tx-timeout-during-netpoll.patch bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch bonding-avoid-possible-dead-lock.patch bonding-fix-warning-message.patch bonding-pass-link-local-packets-to-bonding-master-also.patch inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch ip6_tunnel-be-careful-when-accessing-the-inner-header.patch ip_tunnel-be-careful-when-accessing-the-inner-header.patch ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch net-aquantia-memory-corruption-on-jumbo-frames.patch net-dsa-bcm_sf2-call-setup-during-switch-resume.patch net-dsa-bcm_sf2-fix-unbind-ordering.patch net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch net-mvpp2-fix-a-txq_done-race-condition.patch net-packet-fix-packet-drop-as-of-virtio-gso.patch net-sched-add-policy-validation-for-tc-attributes.patch net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch net-systemport-fix-wake-up-interrupt-race-during-resume.patch net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch netlabel-check-for-ipv4mask-in-addrinfo_get.patch nfp-avoid-soft-lockups-under-control-message-storm.patch qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch sctp-update-dst-pmtu-with-the-correct-daddr.patch tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch team-forbid-enslaving-team-device-to-itself.patch tipc-fix-flow-control-accounting-for-implicit-connect.patch udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch --- diff --git a/queue-4.14/bnxt_en-don-t-try-to-offload-vlan-modify-action.patch b/queue-4.14/bnxt_en-don-t-try-to-offload-vlan-modify-action.patch new file mode 100644 index 00000000000..533210a8b2c --- /dev/null +++ b/queue-4.14/bnxt_en-don-t-try-to-offload-vlan-modify-action.patch @@ -0,0 +1,63 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Davide Caratti +Date: Wed, 19 Sep 2018 19:01:37 +0200 +Subject: bnxt_en: don't try to offload VLAN 'modify' action + +From: Davide Caratti + +[ Upstream commit 8c6ec3613e7b0aade20a3196169c0bab32ed3e3f ] + +bnxt offload code currently supports only 'push' and 'pop' operation: let +.ndo_setup_tc() return -EOPNOTSUPP if VLAN 'modify' action is configured. + +Fixes: 2ae7408fedfe ("bnxt_en: bnxt: add TC flower filter offload support") +Signed-off-by: Davide Caratti +Acked-by: Sathya Perla +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +@@ -78,17 +78,23 @@ static int bnxt_tc_parse_redir(struct bn + return 0; + } + +-static void bnxt_tc_parse_vlan(struct bnxt *bp, +- struct bnxt_tc_actions *actions, +- const struct tc_action *tc_act) ++static int bnxt_tc_parse_vlan(struct bnxt *bp, ++ struct bnxt_tc_actions *actions, ++ const struct tc_action *tc_act) + { +- if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_POP) { ++ switch (tcf_vlan_action(tc_act)) { ++ case TCA_VLAN_ACT_POP: + actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN; +- } else if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_PUSH) { ++ break; ++ case TCA_VLAN_ACT_PUSH: + actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN; + actions->push_vlan_tci = htons(tcf_vlan_push_vid(tc_act)); + actions->push_vlan_tpid = tcf_vlan_push_proto(tc_act); ++ break; ++ default: ++ return -EOPNOTSUPP; + } ++ return 0; + } + + static int bnxt_tc_parse_actions(struct bnxt *bp, +@@ -122,7 +128,9 @@ static int bnxt_tc_parse_actions(struct + + /* Push/pop VLAN */ + if (is_tcf_vlan(tc_act)) { +- bnxt_tc_parse_vlan(bp, actions, tc_act); ++ rc = bnxt_tc_parse_vlan(bp, actions, tc_act); ++ if (rc) ++ return rc; + continue; + } + } diff --git a/queue-4.14/bnxt_en-fix-tx-timeout-during-netpoll.patch b/queue-4.14/bnxt_en-fix-tx-timeout-during-netpoll.patch new file mode 100644 index 00000000000..b9cff517d23 --- /dev/null +++ b/queue-4.14/bnxt_en-fix-tx-timeout-during-netpoll.patch @@ -0,0 +1,73 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Michael Chan +Date: Wed, 26 Sep 2018 00:41:04 -0400 +Subject: bnxt_en: Fix TX timeout during netpoll. + +From: Michael Chan + +[ Upstream commit 73f21c653f930f438d53eed29b5e4c65c8a0f906 ] + +The current netpoll implementation in the bnxt_en driver has problems +that may miss TX completion events. bnxt_poll_work() in effect is +only handling at most 1 TX packet before exiting. In addition, +there may be in flight TX completions that ->poll() may miss even +after we fix bnxt_poll_work() to handle all visible TX completions. +netpoll may not call ->poll() again and HW may not generate IRQ +because the driver does not ARM the IRQ when the budget (0 for netpoll) +is reached. + +We fix it by handling all TX completions and to always ARM the IRQ +when we exit ->poll() with 0 budget. + +Also, the logic to ACK the completion ring in case it is almost filled +with TX completions need to be adjusted to take care of the 0 budget +case, as discussed with Eric Dumazet + +Reported-by: Song Liu +Reviewed-by: Song Liu +Tested-by: Song Liu +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -1864,8 +1864,11 @@ static int bnxt_poll_work(struct bnxt *b + if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) { + tx_pkts++; + /* return full budget so NAPI will complete. */ +- if (unlikely(tx_pkts > bp->tx_wake_thresh)) ++ if (unlikely(tx_pkts > bp->tx_wake_thresh)) { + rx_pkts = budget; ++ raw_cons = NEXT_RAW_CMP(raw_cons); ++ break; ++ } + } else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { + if (likely(budget)) + rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event); +@@ -1893,7 +1896,7 @@ static int bnxt_poll_work(struct bnxt *b + } + raw_cons = NEXT_RAW_CMP(raw_cons); + +- if (rx_pkts == budget) ++ if (rx_pkts && rx_pkts == budget) + break; + } + +@@ -2007,8 +2010,12 @@ static int bnxt_poll(struct napi_struct + while (1) { + work_done += bnxt_poll_work(bp, bnapi, budget - work_done); + +- if (work_done >= budget) ++ if (work_done >= budget) { ++ if (!budget) ++ BNXT_CP_DB_REARM(cpr->cp_doorbell, ++ cpr->cp_raw_cons); + break; ++ } + + if (!bnxt_has_work(bp, cpr)) { + if (napi_complete_done(napi, work_done)) diff --git a/queue-4.14/bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch b/queue-4.14/bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch new file mode 100644 index 00000000000..847183abbea --- /dev/null +++ b/queue-4.14/bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch @@ -0,0 +1,50 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Venkat Duvvuru +Date: Fri, 5 Oct 2018 00:26:02 -0400 +Subject: bnxt_en: free hwrm resources, if driver probe fails. + +From: Venkat Duvvuru + +[ Upstream commit a2bf74f4e1b82395dad2b08d2a911d9151db71c1 ] + +When the driver probe fails, all the resources that were allocated prior +to the failure must be freed. However, hwrm dma response memory is not +getting freed. + +This patch fixes the problem described above. + +Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") +Signed-off-by: Venkat Duvvuru +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -2964,10 +2964,11 @@ static void bnxt_free_hwrm_resources(str + { + struct pci_dev *pdev = bp->pdev; + +- dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr, +- bp->hwrm_cmd_resp_dma_addr); +- +- bp->hwrm_cmd_resp_addr = NULL; ++ if (bp->hwrm_cmd_resp_addr) { ++ dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr, ++ bp->hwrm_cmd_resp_dma_addr); ++ bp->hwrm_cmd_resp_addr = NULL; ++ } + if (bp->hwrm_dbg_resp_addr) { + dma_free_coherent(&pdev->dev, HWRM_DBG_REG_BUF_SIZE, + bp->hwrm_dbg_resp_addr, +@@ -8217,6 +8218,7 @@ init_err_cleanup_tc: + bnxt_clear_int_mode(bp); + + init_err_pci_clean: ++ bnxt_free_hwrm_resources(bp); + bnxt_cleanup_pci(bp); + + init_err_free: diff --git a/queue-4.14/bonding-avoid-possible-dead-lock.patch b/queue-4.14/bonding-avoid-possible-dead-lock.patch new file mode 100644 index 00000000000..b3a4aa08e99 --- /dev/null +++ b/queue-4.14/bonding-avoid-possible-dead-lock.patch @@ -0,0 +1,244 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Mahesh Bandewar +Date: Mon, 24 Sep 2018 14:40:11 -0700 +Subject: bonding: avoid possible dead-lock + +From: Mahesh Bandewar + +[ Upstream commit d4859d749aa7090ffb743d15648adb962a1baeae ] + +Syzkaller reported this on a slightly older kernel but it's still +applicable to the current kernel - + +====================================================== +WARNING: possible circular locking dependency detected +4.18.0-next-20180823+ #46 Not tainted +------------------------------------------------------ +syz-executor4/26841 is trying to acquire lock: +00000000dd41ef48 ((wq_completion)bond_dev->name){+.+.}, at: flush_workqueue+0x2db/0x1e10 kernel/workqueue.c:2652 + +but task is already holding lock: +00000000768ab431 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline] +00000000768ab431 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4708 + +which lock already depends on the new lock. + +the existing dependency chain (in reverse order) is: + +-> #2 (rtnl_mutex){+.+.}: + __mutex_lock_common kernel/locking/mutex.c:925 [inline] + __mutex_lock+0x171/0x1700 kernel/locking/mutex.c:1073 + mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:1088 + rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77 + bond_netdev_notify drivers/net/bonding/bond_main.c:1310 [inline] + bond_netdev_notify_work+0x44/0xd0 drivers/net/bonding/bond_main.c:1320 + process_one_work+0xc73/0x1aa0 kernel/workqueue.c:2153 + worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 + kthread+0x35a/0x420 kernel/kthread.c:246 + ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 + +-> #1 ((work_completion)(&(&nnw->work)->work)){+.+.}: + process_one_work+0xc0b/0x1aa0 kernel/workqueue.c:2129 + worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 + kthread+0x35a/0x420 kernel/kthread.c:246 + ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 + +-> #0 ((wq_completion)bond_dev->name){+.+.}: + lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 + flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 + drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 + destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 + __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 + bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 + register_netdevice+0x337/0x1100 net/core/dev.c:8410 + bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 + rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 + rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 + netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 + rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 + netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] + netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 + netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 + sock_sendmsg_nosec net/socket.c:622 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:632 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 + __sys_sendmsg+0x11d/0x290 net/socket.c:2153 + __do_sys_sendmsg net/socket.c:2162 [inline] + __se_sys_sendmsg net/socket.c:2160 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +other info that might help us debug this: + +Chain exists of: + (wq_completion)bond_dev->name --> (work_completion)(&(&nnw->work)->work) --> rtnl_mutex + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(rtnl_mutex); + lock((work_completion)(&(&nnw->work)->work)); + lock(rtnl_mutex); + lock((wq_completion)bond_dev->name); + + *** DEADLOCK *** + +1 lock held by syz-executor4/26841: + +stack backtrace: +CPU: 1 PID: 26841 Comm: syz-executor4 Not tainted 4.18.0-next-20180823+ #46 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 + print_circular_bug.isra.34.cold.55+0x1bd/0x27d kernel/locking/lockdep.c:1222 + check_prev_add kernel/locking/lockdep.c:1862 [inline] + check_prevs_add kernel/locking/lockdep.c:1975 [inline] + validate_chain kernel/locking/lockdep.c:2416 [inline] + __lock_acquire+0x3449/0x5020 kernel/locking/lockdep.c:3412 + lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 + flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 + drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 + destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 + __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 + bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 + register_netdevice+0x337/0x1100 net/core/dev.c:8410 + bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 + rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 + rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 + netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 + rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 + netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] + netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 + netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 + sock_sendmsg_nosec net/socket.c:622 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:632 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 + __sys_sendmsg+0x11d/0x290 net/socket.c:2153 + __do_sys_sendmsg net/socket.c:2162 [inline] + __se_sys_sendmsg net/socket.c:2160 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x457089 +Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007f2df20a5c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f2df20a66d4 RCX: 0000000000457089 +RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 +RBP: 0000000000930140 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 00000000004d40b8 R14: 00000000004c8ad8 R15: 0000000000000001 + +Signed-off-by: Mahesh Bandewar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 43 +++++++++++++++------------------------- + include/net/bonding.h | 7 ------ + 2 files changed, 18 insertions(+), 32 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -210,6 +210,7 @@ static void bond_get_stats(struct net_de + static void bond_slave_arr_handler(struct work_struct *work); + static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, + int mod); ++static void bond_netdev_notify_work(struct work_struct *work); + + /*---------------------------- General routines -----------------------------*/ + +@@ -1254,6 +1255,8 @@ static struct slave *bond_alloc_slave(st + return NULL; + } + } ++ INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); ++ + return slave; + } + +@@ -1261,6 +1264,7 @@ static void bond_free_slave(struct slave + { + struct bonding *bond = bond_get_bond_by_slave(slave); + ++ cancel_delayed_work_sync(&slave->notify_work); + if (BOND_MODE(bond) == BOND_MODE_8023AD) + kfree(SLAVE_AD_INFO(slave)); + +@@ -1282,39 +1286,26 @@ static void bond_fill_ifslave(struct sla + info->link_failure_count = slave->link_failure_count; + } + +-static void bond_netdev_notify(struct net_device *dev, +- struct netdev_bonding_info *info) +-{ +- rtnl_lock(); +- netdev_bonding_info_change(dev, info); +- rtnl_unlock(); +-} +- + static void bond_netdev_notify_work(struct work_struct *_work) + { +- struct netdev_notify_work *w = +- container_of(_work, struct netdev_notify_work, work.work); ++ struct slave *slave = container_of(_work, struct slave, ++ notify_work.work); ++ ++ if (rtnl_trylock()) { ++ struct netdev_bonding_info binfo; + +- bond_netdev_notify(w->dev, &w->bonding_info); +- dev_put(w->dev); +- kfree(w); ++ bond_fill_ifslave(slave, &binfo.slave); ++ bond_fill_ifbond(slave->bond, &binfo.master); ++ netdev_bonding_info_change(slave->dev, &binfo); ++ rtnl_unlock(); ++ } else { ++ queue_delayed_work(slave->bond->wq, &slave->notify_work, 1); ++ } + } + + void bond_queue_slave_event(struct slave *slave) + { +- struct bonding *bond = slave->bond; +- struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC); +- +- if (!nnw) +- return; +- +- dev_hold(slave->dev); +- nnw->dev = slave->dev; +- bond_fill_ifslave(slave, &nnw->bonding_info.slave); +- bond_fill_ifbond(bond, &nnw->bonding_info.master); +- INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work); +- +- queue_delayed_work(slave->bond->wq, &nnw->work, 0); ++ queue_delayed_work(slave->bond->wq, &slave->notify_work, 0); + } + + void bond_lower_state_changed(struct slave *slave) +--- a/include/net/bonding.h ++++ b/include/net/bonding.h +@@ -139,12 +139,6 @@ struct bond_parm_tbl { + int mode; + }; + +-struct netdev_notify_work { +- struct delayed_work work; +- struct net_device *dev; +- struct netdev_bonding_info bonding_info; +-}; +- + struct slave { + struct net_device *dev; /* first - useful for panic debug */ + struct bonding *bond; /* our master */ +@@ -172,6 +166,7 @@ struct slave { + #ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *np; + #endif ++ struct delayed_work notify_work; + struct kobject kobj; + struct rtnl_link_stats64 slave_stats; + }; diff --git a/queue-4.14/bonding-fix-warning-message.patch b/queue-4.14/bonding-fix-warning-message.patch new file mode 100644 index 00000000000..170a6d694ae --- /dev/null +++ b/queue-4.14/bonding-fix-warning-message.patch @@ -0,0 +1,40 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Mahesh Bandewar +Date: Tue, 2 Oct 2018 12:14:34 -0700 +Subject: bonding: fix warning message + +From: Mahesh Bandewar + +[ Upstream commit 0f3b914c9cfcd7bbedd445dc4ac5dd999fa213c2 ] + +RX queue config for bonding master could be different from its slave +device(s). With the commit 6a9e461f6fe4 ("bonding: pass link-local +packets to bonding master also."), the packet is reinjected into stack +with skb->dev as bonding master. This potentially triggers the +message: + + "bondX received packet on queue Y, but number of RX queues is Z" + +whenever the queue that packet is received on is higher than the +numrxqueues on bonding master (Y > Z). + +Fixes: 6a9e461f6fe4 ("bonding: pass link-local packets to bonding master also.") +Reported-by: John Sperbeck +Signed-off-by: Eric Dumazet +Signed-off-by: Mahesh Bandewar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1193,6 +1193,7 @@ static rx_handler_result_t bond_handle_f + + if (nskb) { + nskb->dev = bond->dev; ++ nskb->queue_mapping = 0; + netif_rx(nskb); + } + return RX_HANDLER_PASS; diff --git a/queue-4.14/bonding-pass-link-local-packets-to-bonding-master-also.patch b/queue-4.14/bonding-pass-link-local-packets-to-bonding-master-also.patch new file mode 100644 index 00000000000..ba24db814e9 --- /dev/null +++ b/queue-4.14/bonding-pass-link-local-packets-to-bonding-master-also.patch @@ -0,0 +1,59 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Mahesh Bandewar +Date: Mon, 24 Sep 2018 14:39:42 -0700 +Subject: bonding: pass link-local packets to bonding master also. + +From: Mahesh Bandewar + +[ Upstream commit 6a9e461f6fe4434e6172304b69774daff9a3ac4c ] + +Commit b89f04c61efe ("bonding: deliver link-local packets with +skb->dev set to link that packets arrived on") changed the behavior +of how link-local-multicast packets are processed. The change in +the behavior broke some legacy use cases where these packets are +expected to arrive on bonding master device also. + +This patch passes the packet to the stack with the link it arrived +on as well as passes to the bonding-master device to preserve the +legacy use case. + +Fixes: b89f04c61efe ("bonding: deliver link-local packets with skb->dev set to link that packets arrived on") +Reported-by: Michal Soltys +Signed-off-by: Mahesh Bandewar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 21 +++++++++++++++++++-- + 1 file changed, 19 insertions(+), 2 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1177,9 +1177,26 @@ static rx_handler_result_t bond_handle_f + } + } + +- /* don't change skb->dev for link-local packets */ +- if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) ++ /* Link-local multicast packets should be passed to the ++ * stack on the link they arrive as well as pass them to the ++ * bond-master device. These packets are mostly usable when ++ * stack receives it with the link on which they arrive ++ * (e.g. LLDP) they also must be available on master. Some of ++ * the use cases include (but are not limited to): LLDP agents ++ * that must be able to operate both on enslaved interfaces as ++ * well as on bonds themselves; linux bridges that must be able ++ * to process/pass BPDUs from attached bonds when any kind of ++ * STP version is enabled on the network. ++ */ ++ if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) { ++ struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); ++ ++ if (nskb) { ++ nskb->dev = bond->dev; ++ netif_rx(nskb); ++ } + return RX_HANDLER_PASS; ++ } + if (bond_should_deliver_exact_match(skb, slave, bond)) + return RX_HANDLER_EXACT; + diff --git a/queue-4.14/inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch b/queue-4.14/inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch new file mode 100644 index 00000000000..4c1b8750e0f --- /dev/null +++ b/queue-4.14/inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch @@ -0,0 +1,101 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Eric Dumazet +Date: Tue, 2 Oct 2018 12:35:05 -0700 +Subject: inet: make sure to grab rcu_read_lock before using ireq->ireq_opt + +From: Eric Dumazet + +[ Upstream commit 2ab2ddd301a22ca3c5f0b743593e4ad2953dfa53 ] + +Timer handlers do not imply rcu_read_lock(), so my recent fix +triggered a LOCKDEP warning when SYNACK is retransmit. + +Lets add rcu_read_lock()/rcu_read_unlock() pairs around ireq->ireq_opt +usages instead of guessing what is done by callers, since it is +not worth the pain. + +Get rid of ireq_opt_deref() helper since it hides the logic +without real benefit, since it is now a standard rcu_dereference(). + +Fixes: 1ad98e9d1bdf ("tcp/dccp: fix lockdep issue when SYN is backlogged") +Signed-off-by: Eric Dumazet +Reported-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_sock.h | 5 ----- + net/dccp/ipv4.c | 4 +++- + net/ipv4/inet_connection_sock.c | 5 ++++- + net/ipv4/tcp_ipv4.c | 4 +++- + 4 files changed, 10 insertions(+), 8 deletions(-) + +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -129,11 +129,6 @@ static inline int inet_request_bound_dev + return sk->sk_bound_dev_if; + } + +-static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) +-{ +- return rcu_dereference(ireq->ireq_opt); +-} +- + struct inet_cork { + unsigned int flags; + __be32 addr; +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -493,9 +493,11 @@ static int dccp_v4_send_response(const s + + dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, + ireq->ir_rmt_addr); ++ rcu_read_lock(); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- ireq_opt_deref(ireq)); ++ rcu_dereference(ireq->ireq_opt)); ++ rcu_read_unlock(); + err = net_xmit_eval(err); + } + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -542,7 +542,8 @@ struct dst_entry *inet_csk_route_req(con + struct ip_options_rcu *opt; + struct rtable *rt; + +- opt = ireq_opt_deref(ireq); ++ rcu_read_lock(); ++ opt = rcu_dereference(ireq->ireq_opt); + + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, +@@ -556,11 +557,13 @@ struct dst_entry *inet_csk_route_req(con + goto no_route; + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) + goto route_err; ++ rcu_read_unlock(); + return &rt->dst; + + route_err: + ip_rt_put(rt); + no_route: ++ rcu_read_unlock(); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); + return NULL; + } +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -875,9 +875,11 @@ static int tcp_v4_send_synack(const stru + if (skb) { + __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + ++ rcu_read_lock(); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- ireq_opt_deref(ireq)); ++ rcu_dereference(ireq->ireq_opt)); ++ rcu_read_unlock(); + err = net_xmit_eval(err); + } + diff --git a/queue-4.14/ip6_tunnel-be-careful-when-accessing-the-inner-header.patch b/queue-4.14/ip6_tunnel-be-careful-when-accessing-the-inner-header.patch new file mode 100644 index 00000000000..9fb1c25d397 --- /dev/null +++ b/queue-4.14/ip6_tunnel-be-careful-when-accessing-the-inner-header.patch @@ -0,0 +1,136 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Paolo Abeni +Date: Wed, 19 Sep 2018 15:02:07 +0200 +Subject: ip6_tunnel: be careful when accessing the inner header + +From: Paolo Abeni + +[ Upstream commit 76c0ddd8c3a683f6e2c6e60e11dc1a1558caf4bc ] + +the ip6 tunnel xmit ndo assumes that the processed skb always +contains an ip[v6] header, but syzbot has found a way to send +frames that fall short of this assumption, leading to the following splat: + +BUG: KMSAN: uninit-value in ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 +[inline] +BUG: KMSAN: uninit-value in ip6_tnl_start_xmit+0x7d2/0x1ef0 +net/ipv6/ip6_tunnel.c:1390 +CPU: 0 PID: 4504 Comm: syz-executor558 Not tainted 4.16.0+ #87 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS +Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:17 [inline] + dump_stack+0x185/0x1d0 lib/dump_stack.c:53 + kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 + __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683 + ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 [inline] + ip6_tnl_start_xmit+0x7d2/0x1ef0 net/ipv6/ip6_tunnel.c:1390 + __netdev_start_xmit include/linux/netdevice.h:4066 [inline] + netdev_start_xmit include/linux/netdevice.h:4075 [inline] + xmit_one net/core/dev.c:3026 [inline] + dev_hard_start_xmit+0x5f1/0xc70 net/core/dev.c:3042 + __dev_queue_xmit+0x27ee/0x3520 net/core/dev.c:3557 + dev_queue_xmit+0x4b/0x60 net/core/dev.c:3590 + packet_snd net/packet/af_packet.c:2944 [inline] + packet_sendmsg+0x7c70/0x8a30 net/packet/af_packet.c:2969 + sock_sendmsg_nosec net/socket.c:630 [inline] + sock_sendmsg net/socket.c:640 [inline] + ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 + __sys_sendmmsg+0x42d/0x800 net/socket.c:2136 + SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167 + SyS_sendmmsg+0x63/0x90 net/socket.c:2162 + do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 +RIP: 0033:0x441819 +RSP: 002b:00007ffe58ee8268 EFLAGS: 00000213 ORIG_RAX: 0000000000000133 +RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000441819 +RDX: 0000000000000002 RSI: 0000000020000100 RDI: 0000000000000003 +RBP: 00000000006cd018 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000213 R12: 0000000000402510 +R13: 00000000004025a0 R14: 0000000000000000 R15: 0000000000000000 + +Uninit was created at: + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] + kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 + kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 + kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 + slab_post_alloc_hook mm/slab.h:445 [inline] + slab_alloc_node mm/slub.c:2737 [inline] + __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 + __kmalloc_reserve net/core/skbuff.c:138 [inline] + __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 + alloc_skb include/linux/skbuff.h:984 [inline] + alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234 + sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085 + packet_alloc_skb net/packet/af_packet.c:2803 [inline] + packet_snd net/packet/af_packet.c:2894 [inline] + packet_sendmsg+0x6454/0x8a30 net/packet/af_packet.c:2969 + sock_sendmsg_nosec net/socket.c:630 [inline] + sock_sendmsg net/socket.c:640 [inline] + ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 + __sys_sendmmsg+0x42d/0x800 net/socket.c:2136 + SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167 + SyS_sendmmsg+0x63/0x90 net/socket.c:2162 + do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 + +This change addresses the issue adding the needed check before +accessing the inner header. + +The ipv4 side of the issue is apparently there since the ipv4 over ipv6 +initial support, and the ipv6 side predates git history. + +Fixes: c4d3efafcc93 ("[IPV6] IP6TUNNEL: Add support to IPv4 over IPv6 tunnel.") +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: syzbot+3fde91d4d394747d6db4@syzkaller.appspotmail.com +Tested-by: Alexander Potapenko +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1227,7 +1227,7 @@ static inline int + ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct ip6_tnl *t = netdev_priv(dev); +- const struct iphdr *iph = ip_hdr(skb); ++ const struct iphdr *iph; + int encap_limit = -1; + struct flowi6 fl6; + __u8 dsfield; +@@ -1235,6 +1235,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str + u8 tproto; + int err; + ++ /* ensure we can access the full inner ip header */ ++ if (!pskb_may_pull(skb, sizeof(struct iphdr))) ++ return -1; ++ ++ iph = ip_hdr(skb); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + + tproto = ACCESS_ONCE(t->parms.proto); +@@ -1298,7 +1303,7 @@ static inline int + ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct ip6_tnl *t = netdev_priv(dev); +- struct ipv6hdr *ipv6h = ipv6_hdr(skb); ++ struct ipv6hdr *ipv6h; + int encap_limit = -1; + __u16 offset; + struct flowi6 fl6; +@@ -1307,6 +1312,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str + u8 tproto; + int err; + ++ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) ++ return -1; ++ ++ ipv6h = ipv6_hdr(skb); + tproto = ACCESS_ONCE(t->parms.proto); + if ((tproto != IPPROTO_IPV6 && tproto != 0) || + ip6_tnl_addr_conflict(t, ipv6h)) diff --git a/queue-4.14/ip_tunnel-be-careful-when-accessing-the-inner-header.patch b/queue-4.14/ip_tunnel-be-careful-when-accessing-the-inner-header.patch new file mode 100644 index 00000000000..be7b61f8b10 --- /dev/null +++ b/queue-4.14/ip_tunnel-be-careful-when-accessing-the-inner-header.patch @@ -0,0 +1,47 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Paolo Abeni +Date: Mon, 24 Sep 2018 15:48:19 +0200 +Subject: ip_tunnel: be careful when accessing the inner header + +From: Paolo Abeni + +[ Upstream commit ccfec9e5cb2d48df5a955b7bf47f7782157d3bc2] + +Cong noted that we need the same checks introduced by commit 76c0ddd8c3a6 +("ip6_tunnel: be careful when accessing the inner header") +even for ipv4 tunnels. + +Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") +Suggested-by: Cong Wang +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -635,6 +635,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, + const struct iphdr *tnl_params, u8 protocol) + { + struct ip_tunnel *tunnel = netdev_priv(dev); ++ unsigned int inner_nhdr_len = 0; + const struct iphdr *inner_iph; + struct flowi4 fl4; + u8 tos, ttl; +@@ -644,6 +645,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, + __be32 dst; + bool connected; + ++ /* ensure we can access the inner net header, for several users below */ ++ if (skb->protocol == htons(ETH_P_IP)) ++ inner_nhdr_len = sizeof(struct iphdr); ++ else if (skb->protocol == htons(ETH_P_IPV6)) ++ inner_nhdr_len = sizeof(struct ipv6hdr); ++ if (unlikely(!pskb_may_pull(skb, inner_nhdr_len))) ++ goto tx_error; ++ + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + connected = (tunnel->parms.iph.daddr != 0); + diff --git a/queue-4.14/ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch b/queue-4.14/ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch new file mode 100644 index 00000000000..cc8a7c12f78 --- /dev/null +++ b/queue-4.14/ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch @@ -0,0 +1,42 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Eric Dumazet +Date: Sun, 30 Sep 2018 11:33:39 -0700 +Subject: ipv4: fix use-after-free in ip_cmsg_recv_dstaddr() + +From: Eric Dumazet + +[ Upstream commit 64199fc0a46ba211362472f7f942f900af9492fd ] + +Caching ip_hdr(skb) before a call to pskb_may_pull() is buggy, +do not do it. + +Fixes: 2efd4fca703a ("ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull") +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Reported-by: syzbot +Acked-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_sockglue.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/ipv4/ip_sockglue.c ++++ b/net/ipv4/ip_sockglue.c +@@ -147,7 +147,6 @@ static void ip_cmsg_recv_security(struct + static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) + { + struct sockaddr_in sin; +- const struct iphdr *iph = ip_hdr(skb); + __be16 *ports; + int end; + +@@ -162,7 +161,7 @@ static void ip_cmsg_recv_dstaddr(struct + ports = (__be16 *)skb_transport_header(skb); + + sin.sin_family = AF_INET; +- sin.sin_addr.s_addr = iph->daddr; ++ sin.sin_addr.s_addr = ip_hdr(skb)->daddr; + sin.sin_port = ports[1]; + memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); + diff --git a/queue-4.14/ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch b/queue-4.14/ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch new file mode 100644 index 00000000000..ef52ac22eed --- /dev/null +++ b/queue-4.14/ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch @@ -0,0 +1,169 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Wei Wang +Date: Thu, 4 Oct 2018 10:12:37 -0700 +Subject: ipv6: take rcu lock in rawv6_send_hdrinc() + +From: Wei Wang + +[ Upstream commit a688caa34beb2fd2a92f1b6d33e40cde433ba160 ] + +In rawv6_send_hdrinc(), in order to avoid an extra dst_hold(), we +directly assign the dst to skb and set passed in dst to NULL to avoid +double free. +However, in error case, we free skb and then do stats update with the +dst pointer passed in. This causes use-after-free on the dst. +Fix it by taking rcu read lock right before dst could get released to +make sure dst does not get freed until the stats update is done. +Note: we don't have this issue in ipv4 cause dst is not used for stats +update in v4. + +Syzkaller reported following crash: +BUG: KASAN: use-after-free in rawv6_send_hdrinc net/ipv6/raw.c:692 [inline] +BUG: KASAN: use-after-free in rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921 +Read of size 8 at addr ffff8801d95ba730 by task syz-executor0/32088 + +CPU: 1 PID: 32088 Comm: syz-executor0 Not tainted 4.19.0-rc2+ #93 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 + print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 + kasan_report_error mm/kasan/report.c:354 [inline] + kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 + __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 + rawv6_send_hdrinc net/ipv6/raw.c:692 [inline] + rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921 + inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 + sock_sendmsg_nosec net/socket.c:621 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:631 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114 + __sys_sendmsg+0x11d/0x280 net/socket.c:2152 + __do_sys_sendmsg net/socket.c:2161 [inline] + __se_sys_sendmsg net/socket.c:2159 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x457099 +Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007f83756edc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f83756ee6d4 RCX: 0000000000457099 +RDX: 0000000000000000 RSI: 0000000020003840 RDI: 0000000000000004 +RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 00000000004d4b30 R14: 00000000004c90b1 R15: 0000000000000000 + +Allocated by task 32088: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 + kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 + kmem_cache_alloc+0x12e/0x730 mm/slab.c:3554 + dst_alloc+0xbb/0x1d0 net/core/dst.c:105 + ip6_dst_alloc+0x35/0xa0 net/ipv6/route.c:353 + ip6_rt_cache_alloc+0x247/0x7b0 net/ipv6/route.c:1186 + ip6_pol_route+0x8f8/0xd90 net/ipv6/route.c:1895 + ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2093 + fib6_rule_lookup+0x277/0x860 net/ipv6/fib6_rules.c:122 + ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2121 + ip6_route_output include/net/ip6_route.h:88 [inline] + ip6_dst_lookup_tail+0xe27/0x1d60 net/ipv6/ip6_output.c:951 + ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079 + rawv6_sendmsg+0x12d9/0x4630 net/ipv6/raw.c:905 + inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 + sock_sendmsg_nosec net/socket.c:621 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:631 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114 + __sys_sendmsg+0x11d/0x280 net/socket.c:2152 + __do_sys_sendmsg net/socket.c:2161 [inline] + __se_sys_sendmsg net/socket.c:2159 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +Freed by task 5356: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 + kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 + __cache_free mm/slab.c:3498 [inline] + kmem_cache_free+0x83/0x290 mm/slab.c:3756 + dst_destroy+0x267/0x3c0 net/core/dst.c:141 + dst_destroy_rcu+0x16/0x19 net/core/dst.c:154 + __rcu_reclaim kernel/rcu/rcu.h:236 [inline] + rcu_do_batch kernel/rcu/tree.c:2576 [inline] + invoke_rcu_callbacks kernel/rcu/tree.c:2880 [inline] + __rcu_process_callbacks kernel/rcu/tree.c:2847 [inline] + rcu_process_callbacks+0xf23/0x2670 kernel/rcu/tree.c:2864 + __do_softirq+0x30b/0xad8 kernel/softirq.c:292 + +Fixes: 1789a640f556 ("raw: avoid two atomics in xmit") +Signed-off-by: Wei Wang +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/raw.c | 29 ++++++++++++++++++++--------- + 1 file changed, 20 insertions(+), 9 deletions(-) + +--- a/net/ipv6/raw.c ++++ b/net/ipv6/raw.c +@@ -650,8 +650,6 @@ static int rawv6_send_hdrinc(struct sock + skb->protocol = htons(ETH_P_IPV6); + skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; +- skb_dst_set(skb, &rt->dst); +- *dstp = NULL; + + skb_put(skb, length); + skb_reset_network_header(skb); +@@ -664,8 +662,14 @@ static int rawv6_send_hdrinc(struct sock + + skb->transport_header = skb->network_header; + err = memcpy_from_msg(iph, msg, length); +- if (err) +- goto error_fault; ++ if (err) { ++ err = -EFAULT; ++ kfree_skb(skb); ++ goto error; ++ } ++ ++ skb_dst_set(skb, &rt->dst); ++ *dstp = NULL; + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing +@@ -674,21 +678,28 @@ static int rawv6_send_hdrinc(struct sock + if (unlikely(!skb)) + return 0; + ++ /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev ++ * in the error path. Since skb has been freed, the dst could ++ * have been queued for deletion. ++ */ ++ rcu_read_lock(); + IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, + NULL, rt->dst.dev, dst_output); + if (err > 0) + err = net_xmit_errno(err); +- if (err) +- goto error; ++ if (err) { ++ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); ++ rcu_read_unlock(); ++ goto error_check; ++ } ++ rcu_read_unlock(); + out: + return 0; + +-error_fault: +- err = -EFAULT; +- kfree_skb(skb); + error: + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); ++error_check: + if (err == -ENOBUFS && !np->recverr) + err = 0; + return err; diff --git a/queue-4.14/net-aquantia-memory-corruption-on-jumbo-frames.patch b/queue-4.14/net-aquantia-memory-corruption-on-jumbo-frames.patch new file mode 100644 index 00000000000..d901f6ee52e --- /dev/null +++ b/queue-4.14/net-aquantia-memory-corruption-on-jumbo-frames.patch @@ -0,0 +1,89 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Friedemann Gerold +Date: Sat, 15 Sep 2018 18:03:39 +0300 +Subject: net: aquantia: memory corruption on jumbo frames + +From: Friedemann Gerold + +[ Upstream commit d26ed6b0e5e23190d43ab34bc69cbecdc464a2cf ] + +This patch fixes skb_shared area, which will be corrupted +upon reception of 4K jumbo packets. + +Originally build_skb usage purpose was to reuse page for skb to eliminate +needs of extra fragments. But that logic does not take into account that +skb_shared_info should be reserved at the end of skb data area. + +In case packet data consumes all the page (4K), skb_shinfo location +overflows the page. As a consequence, __build_skb zeroed shinfo data above +the allocated page, corrupting next page. + +The issue is rarely seen in real life because jumbo are normally larger +than 4K and that causes another code path to trigger. +But it 100% reproducible with simple scapy packet, like: + + sendp(IP(dst="192.168.100.3") / TCP(dport=443) \ + / Raw(RandString(size=(4096-40))), iface="enp1s0") + +Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") + +Reported-by: Friedemann Gerold +Reported-by: Michael Rauch +Signed-off-by: Friedemann Gerold +Tested-by: Nikita Danilov +Signed-off-by: Igor Russkikh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 32 ++++++++++++----------- + 1 file changed, 18 insertions(+), 14 deletions(-) + +--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c ++++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +@@ -222,9 +222,10 @@ int aq_ring_rx_clean(struct aq_ring_s *s + } + + /* for single fragment packets use build_skb() */ +- if (buff->is_eop) { ++ if (buff->is_eop && ++ buff->len <= AQ_CFG_RX_FRAME_MAX - AQ_SKB_ALIGN) { + skb = build_skb(page_address(buff->page), +- buff->len + AQ_SKB_ALIGN); ++ AQ_CFG_RX_FRAME_MAX); + if (unlikely(!skb)) { + err = -ENOMEM; + goto err_exit; +@@ -244,18 +245,21 @@ int aq_ring_rx_clean(struct aq_ring_s *s + buff->len - ETH_HLEN, + SKB_TRUESIZE(buff->len - ETH_HLEN)); + +- for (i = 1U, next_ = buff->next, +- buff_ = &self->buff_ring[next_]; true; +- next_ = buff_->next, +- buff_ = &self->buff_ring[next_], ++i) { +- skb_add_rx_frag(skb, i, buff_->page, 0, +- buff_->len, +- SKB_TRUESIZE(buff->len - +- ETH_HLEN)); +- buff_->is_cleaned = 1; +- +- if (buff_->is_eop) +- break; ++ if (!buff->is_eop) { ++ for (i = 1U, next_ = buff->next, ++ buff_ = &self->buff_ring[next_]; ++ true; next_ = buff_->next, ++ buff_ = &self->buff_ring[next_], ++i) { ++ skb_add_rx_frag(skb, i, ++ buff_->page, 0, ++ buff_->len, ++ SKB_TRUESIZE(buff->len - ++ ETH_HLEN)); ++ buff_->is_cleaned = 1; ++ ++ if (buff_->is_eop) ++ break; ++ } + } + } + diff --git a/queue-4.14/net-dsa-bcm_sf2-call-setup-during-switch-resume.patch b/queue-4.14/net-dsa-bcm_sf2-call-setup-during-switch-resume.patch new file mode 100644 index 00000000000..a68d8a24383 --- /dev/null +++ b/queue-4.14/net-dsa-bcm_sf2-call-setup-during-switch-resume.patch @@ -0,0 +1,47 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Florian Fainelli +Date: Tue, 9 Oct 2018 16:48:58 -0700 +Subject: net: dsa: bcm_sf2: Call setup during switch resume + +From: Florian Fainelli + +[ Upstream commit 54baca096386d862d19c10f58f34bf787c6b3cbe ] + +There is no reason to open code what the switch setup function does, in +fact, because we just issued a switch reset, we would make all the +register get their default values, including for instance, having unused +port be enabled again and wasting power and leading to an inappropriate +switch core clock being selected. + +Fixes: 8cfa94984c9c ("net: dsa: bcm_sf2: add suspend/resume callbacks") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -772,7 +772,6 @@ static int bcm_sf2_sw_suspend(struct dsa + static int bcm_sf2_sw_resume(struct dsa_switch *ds) + { + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); +- unsigned int port; + int ret; + + ret = bcm_sf2_sw_rst(priv); +@@ -784,12 +783,7 @@ static int bcm_sf2_sw_resume(struct dsa_ + if (priv->hw_params.num_gphy == 1) + bcm_sf2_gphy_enable_set(ds, true); + +- for (port = 0; port < DSA_MAX_PORTS; port++) { +- if ((1 << port) & ds->enabled_port_mask) +- bcm_sf2_port_setup(ds, port, NULL); +- else if (dsa_is_cpu_port(ds, port)) +- bcm_sf2_imp_setup(ds, port); +- } ++ ds->ops->setup(ds); + + return 0; + } diff --git a/queue-4.14/net-dsa-bcm_sf2-fix-unbind-ordering.patch b/queue-4.14/net-dsa-bcm_sf2-fix-unbind-ordering.patch new file mode 100644 index 00000000000..6585fff94e6 --- /dev/null +++ b/queue-4.14/net-dsa-bcm_sf2-fix-unbind-ordering.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Florian Fainelli +Date: Tue, 9 Oct 2018 16:48:57 -0700 +Subject: net: dsa: bcm_sf2: Fix unbind ordering + +From: Florian Fainelli + +[ Upstream commit bf3b452b7af787b8bf27de6490dc4eedf6f97599 ] + +The order in which we release resources is unfortunately leading to bus +errors while dismantling the port. This is because we set +priv->wol_ports_mask to 0 to tell bcm_sf2_sw_suspend() that it is now +permissible to clock gate the switch. Later on, when dsa_slave_destroy() +comes in from dsa_unregister_switch() and calls +dsa_switch_ops::port_disable, we perform the same dismantling again, and +this time we hit registers that are clock gated. + +Make sure that dsa_unregister_switch() is the first thing that happens, +which takes care of releasing all user visible resources, then proceed +with clock gating hardware. We still need to set priv->wol_ports_mask to +0 to make sure that an enabled port properly gets disabled in case it +was previously used as part of Wake-on-LAN. + +Fixes: d9338023fb8e ("net: dsa: bcm_sf2: Make it a real platform device driver") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -1264,10 +1264,10 @@ static int bcm_sf2_sw_remove(struct plat + { + struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + +- /* Disable all ports and interrupts */ + priv->wol_ports_mask = 0; +- bcm_sf2_sw_suspend(priv->dev->ds); + dsa_unregister_switch(priv->dev->ds); ++ /* Disable all ports and interrupts */ ++ bcm_sf2_sw_suspend(priv->dev->ds); + bcm_sf2_mdio_unregister(priv); + + return 0; diff --git a/queue-4.14/net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch b/queue-4.14/net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch new file mode 100644 index 00000000000..2aa23dfe487 --- /dev/null +++ b/queue-4.14/net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch @@ -0,0 +1,47 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: "Maciej Żenczykowski" +Date: Sat, 22 Sep 2018 01:34:01 -0700 +Subject: net-ethtool: ETHTOOL_GUFO did not and should not require CAP_NET_ADMIN + +From: "Maciej Żenczykowski" + +[ Upstream commit 474ff2600889e16280dbc6ada8bfecb216169a70 ] + +So it should not fail with EPERM even though it is no longer implemented... + +This is a fix for: + (userns)$ egrep ^Cap /proc/self/status + CapInh: 0000003fffffffff + CapPrm: 0000003fffffffff + CapEff: 0000003fffffffff + CapBnd: 0000003fffffffff + CapAmb: 0000003fffffffff + + (userns)$ tcpdump -i usb_rndis0 + tcpdump: WARNING: usb_rndis0: SIOCETHTOOL(ETHTOOL_GUFO) ioctl failed: Operation not permitted + Warning: Kernel filter failed: Bad file descriptor + tcpdump: can't remove kernel filter: Bad file descriptor + +With this change it returns EOPNOTSUPP instead of EPERM. + +See also https://github.com/the-tcpdump-group/libpcap/issues/689 + +Fixes: 08a00fea6de2 "net: Remove references to NETIF_F_UFO from ethtool." +Cc: David S. Miller +Signed-off-by: Maciej Żenczykowski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/ethtool.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/ethtool.c ++++ b/net/core/ethtool.c +@@ -2572,6 +2572,7 @@ int dev_ethtool(struct net *net, struct + case ETHTOOL_GPHYSTATS: + case ETHTOOL_GTSO: + case ETHTOOL_GPERMADDR: ++ case ETHTOOL_GUFO: + case ETHTOOL_GGSO: + case ETHTOOL_GGRO: + case ETHTOOL_GFLAGS: diff --git a/queue-4.14/net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch b/queue-4.14/net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch new file mode 100644 index 00000000000..9f006236a9e --- /dev/null +++ b/queue-4.14/net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch @@ -0,0 +1,102 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Yunsheng Lin +Date: Tue, 25 Sep 2018 10:21:55 +0100 +Subject: net: hns: fix for unmapping problem when SMMU is on + +From: Yunsheng Lin + +[ Upstream commit 2e9361efa707e186d91b938e44f9e326725259f7 ] + +If SMMU is on, there is more likely that skb_shinfo(skb)->frags[i] +can not send by a single BD. when this happen, the +hns_nic_net_xmit_hw function map the whole data in a frags using +skb_frag_dma_map, but unmap each BD' data individually when tx is +done, which causes problem when SMMU is on. + +This patch fixes this problem by ummapping the whole data in a +frags when tx is done. + +Signed-off-by: Yunsheng Lin +Signed-off-by: Peng Li +Reviewed-by: Yisen Zhuang +Signed-off-by: Salil Mehta +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/hisilicon/hns/hnae.c | 2 - + drivers/net/ethernet/hisilicon/hns/hns_enet.c | 30 ++++++++++++++++---------- + 2 files changed, 20 insertions(+), 12 deletions(-) + +--- a/drivers/net/ethernet/hisilicon/hns/hnae.c ++++ b/drivers/net/ethernet/hisilicon/hns/hnae.c +@@ -84,7 +84,7 @@ static void hnae_unmap_buffer(struct hna + if (cb->type == DESC_TYPE_SKB) + dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length, + ring_to_dma_dir(ring)); +- else ++ else if (cb->length) + dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length, + ring_to_dma_dir(ring)); + } +--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c ++++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c +@@ -40,9 +40,9 @@ + #define SKB_TMP_LEN(SKB) \ + (((SKB)->transport_header - (SKB)->mac_header) + tcp_hdrlen(SKB)) + +-static void fill_v2_desc(struct hnae_ring *ring, void *priv, +- int size, dma_addr_t dma, int frag_end, +- int buf_num, enum hns_desc_type type, int mtu) ++static void fill_v2_desc_hw(struct hnae_ring *ring, void *priv, int size, ++ int send_sz, dma_addr_t dma, int frag_end, ++ int buf_num, enum hns_desc_type type, int mtu) + { + struct hnae_desc *desc = &ring->desc[ring->next_to_use]; + struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; +@@ -64,7 +64,7 @@ static void fill_v2_desc(struct hnae_rin + desc_cb->type = type; + + desc->addr = cpu_to_le64(dma); +- desc->tx.send_size = cpu_to_le16((u16)size); ++ desc->tx.send_size = cpu_to_le16((u16)send_sz); + + /* config bd buffer end */ + hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1); +@@ -133,6 +133,14 @@ static void fill_v2_desc(struct hnae_rin + ring_ptr_move_fw(ring, next_to_use); + } + ++static void fill_v2_desc(struct hnae_ring *ring, void *priv, ++ int size, dma_addr_t dma, int frag_end, ++ int buf_num, enum hns_desc_type type, int mtu) ++{ ++ fill_v2_desc_hw(ring, priv, size, size, dma, frag_end, ++ buf_num, type, mtu); ++} ++ + static const struct acpi_device_id hns_enet_acpi_match[] = { + { "HISI00C1", 0 }, + { "HISI00C2", 0 }, +@@ -289,15 +297,15 @@ static void fill_tso_desc(struct hnae_ri + + /* when the frag size is bigger than hardware, split this frag */ + for (k = 0; k < frag_buf_num; k++) +- fill_v2_desc(ring, priv, +- (k == frag_buf_num - 1) ? ++ fill_v2_desc_hw(ring, priv, k == 0 ? size : 0, ++ (k == frag_buf_num - 1) ? + sizeoflast : BD_MAX_SEND_SIZE, +- dma + BD_MAX_SEND_SIZE * k, +- frag_end && (k == frag_buf_num - 1) ? 1 : 0, +- buf_num, +- (type == DESC_TYPE_SKB && !k) ? ++ dma + BD_MAX_SEND_SIZE * k, ++ frag_end && (k == frag_buf_num - 1) ? 1 : 0, ++ buf_num, ++ (type == DESC_TYPE_SKB && !k) ? + DESC_TYPE_SKB : DESC_TYPE_PAGE, +- mtu); ++ mtu); + } + + netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, diff --git a/queue-4.14/net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch b/queue-4.14/net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch new file mode 100644 index 00000000000..514633e6ab0 --- /dev/null +++ b/queue-4.14/net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch @@ -0,0 +1,218 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Sabrina Dubroca +Date: Tue, 9 Oct 2018 17:48:14 +0200 +Subject: net: ipv4: update fnhe_pmtu when first hop's MTU changes + +From: Sabrina Dubroca + +[ Upstream commit af7d6cce53694a88d6a1bb60c9a239a6a5144459 ] + +Since commit 5aad1de5ea2c ("ipv4: use separate genid for next hop +exceptions"), exceptions get deprecated separately from cached +routes. In particular, administrative changes don't clear PMTU anymore. + +As Stefano described in commit e9fa1495d738 ("ipv6: Reflect MTU changes +on PMTU of exceptions for MTU-less routes"), the PMTU discovered before +the local MTU change can become stale: + - if the local MTU is now lower than the PMTU, that PMTU is now + incorrect + - if the local MTU was the lowest value in the path, and is increased, + we might discover a higher PMTU + +Similarly to what commit e9fa1495d738 did for IPv6, update PMTU in those +cases. + +If the exception was locked, the discovered PMTU was smaller than the +minimal accepted PMTU. In that case, if the new local MTU is smaller +than the current PMTU, let PMTU discovery figure out if locking of the +exception is still needed. + +To do this, we need to know the old link MTU in the NETDEV_CHANGEMTU +notifier. By the time the notifier is called, dev->mtu has been +changed. This patch adds the old MTU as additional information in the +notifier structure, and a new call_netdevice_notifiers_u32() function. + +Fixes: 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions") +Signed-off-by: Sabrina Dubroca +Reviewed-by: Stefano Brivio +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 7 ++++++ + include/net/ip_fib.h | 1 + net/core/dev.c | 28 +++++++++++++++++++++++-- + net/ipv4/fib_frontend.c | 12 +++++++---- + net/ipv4/fib_semantics.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++ + 5 files changed, 92 insertions(+), 6 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -2307,6 +2307,13 @@ struct netdev_notifier_info { + struct net_device *dev; + }; + ++struct netdev_notifier_info_ext { ++ struct netdev_notifier_info info; /* must be first */ ++ union { ++ u32 mtu; ++ } ext; ++}; ++ + struct netdev_notifier_change_info { + struct netdev_notifier_info info; /* must be first */ + unsigned int flags_changed; +--- a/include/net/ip_fib.h ++++ b/include/net/ip_fib.h +@@ -372,6 +372,7 @@ int ip_fib_check_default(__be32 gw, stru + int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); + int fib_sync_down_addr(struct net_device *dev, __be32 local); + int fib_sync_up(struct net_device *dev, unsigned int nh_flags); ++void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); + + #ifdef CONFIG_IP_ROUTE_MULTIPATH + int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1688,6 +1688,28 @@ int call_netdevice_notifiers(unsigned lo + } + EXPORT_SYMBOL(call_netdevice_notifiers); + ++/** ++ * call_netdevice_notifiers_mtu - call all network notifier blocks ++ * @val: value passed unmodified to notifier function ++ * @dev: net_device pointer passed unmodified to notifier function ++ * @arg: additional u32 argument passed to the notifier function ++ * ++ * Call all network notifier blocks. Parameters and return value ++ * are as for raw_notifier_call_chain(). ++ */ ++static int call_netdevice_notifiers_mtu(unsigned long val, ++ struct net_device *dev, u32 arg) ++{ ++ struct netdev_notifier_info_ext info = { ++ .info.dev = dev, ++ .ext.mtu = arg, ++ }; ++ ++ BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0); ++ ++ return call_netdevice_notifiers_info(val, dev, &info.info); ++} ++ + #ifdef CONFIG_NET_INGRESS + static struct static_key ingress_needed __read_mostly; + +@@ -6891,14 +6913,16 @@ int dev_set_mtu(struct net_device *dev, + err = __dev_set_mtu(dev, new_mtu); + + if (!err) { +- err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); ++ err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, ++ orig_mtu); + err = notifier_to_errno(err); + if (err) { + /* setting mtu back and notifying everyone again, + * so that they have a chance to revert changes. + */ + __dev_set_mtu(dev, orig_mtu); +- call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); ++ call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, ++ new_mtu); + } + } + return err; +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -1185,7 +1185,8 @@ static int fib_inetaddr_event(struct not + static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) + { + struct net_device *dev = netdev_notifier_info_to_dev(ptr); +- struct netdev_notifier_changeupper_info *info; ++ struct netdev_notifier_changeupper_info *upper_info = ptr; ++ struct netdev_notifier_info_ext *info_ext = ptr; + struct in_device *in_dev; + struct net *net = dev_net(dev); + unsigned int flags; +@@ -1220,16 +1221,19 @@ static int fib_netdev_event(struct notif + fib_sync_up(dev, RTNH_F_LINKDOWN); + else + fib_sync_down_dev(dev, event, false); +- /* fall through */ ++ rt_cache_flush(net); ++ break; + case NETDEV_CHANGEMTU: ++ fib_sync_mtu(dev, info_ext->ext.mtu); + rt_cache_flush(net); + break; + case NETDEV_CHANGEUPPER: +- info = ptr; ++ upper_info = ptr; + /* flush all routes if dev is linked to or unlinked from + * an L3 master device (e.g., VRF) + */ +- if (info->upper_dev && netif_is_l3_master(info->upper_dev)) ++ if (upper_info->upper_dev && ++ netif_is_l3_master(upper_info->upper_dev)) + fib_disable_ip(dev, NETDEV_DOWN, true); + break; + } +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -1520,6 +1520,56 @@ static int call_fib_nh_notifiers(struct + return NOTIFY_DONE; + } + ++/* Update the PMTU of exceptions when: ++ * - the new MTU of the first hop becomes smaller than the PMTU ++ * - the old MTU was the same as the PMTU, and it limited discovery of ++ * larger MTUs on the path. With that limit raised, we can now ++ * discover larger MTUs ++ * A special case is locked exceptions, for which the PMTU is smaller ++ * than the minimal accepted PMTU: ++ * - if the new MTU is greater than the PMTU, don't make any change ++ * - otherwise, unlock and set PMTU ++ */ ++static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) ++{ ++ struct fnhe_hash_bucket *bucket; ++ int i; ++ ++ bucket = rcu_dereference_protected(nh->nh_exceptions, 1); ++ if (!bucket) ++ return; ++ ++ for (i = 0; i < FNHE_HASH_SIZE; i++) { ++ struct fib_nh_exception *fnhe; ++ ++ for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); ++ fnhe; ++ fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { ++ if (fnhe->fnhe_mtu_locked) { ++ if (new <= fnhe->fnhe_pmtu) { ++ fnhe->fnhe_pmtu = new; ++ fnhe->fnhe_mtu_locked = false; ++ } ++ } else if (new < fnhe->fnhe_pmtu || ++ orig == fnhe->fnhe_pmtu) { ++ fnhe->fnhe_pmtu = new; ++ } ++ } ++ } ++} ++ ++void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) ++{ ++ unsigned int hash = fib_devindex_hashfn(dev->ifindex); ++ struct hlist_head *head = &fib_info_devhash[hash]; ++ struct fib_nh *nh; ++ ++ hlist_for_each_entry(nh, head, nh_hash) { ++ if (nh->nh_dev == dev) ++ nh_update_mtu(nh, dev->mtu, orig_mtu); ++ } ++} ++ + /* Event force Flags Description + * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host + * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host diff --git a/queue-4.14/net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch b/queue-4.14/net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch new file mode 100644 index 00000000000..2c3eeea5750 --- /dev/null +++ b/queue-4.14/net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch @@ -0,0 +1,61 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Jeff Barnhill <0xeffeff@gmail.com> +Date: Fri, 21 Sep 2018 00:45:27 +0000 +Subject: net/ipv6: Display all addresses in output of /proc/net/if_inet6 + +From: Jeff Barnhill <0xeffeff@gmail.com> + +[ Upstream commit 86f9bd1ff61c413a2a251fa736463295e4e24733 ] + +The backend handling for /proc/net/if_inet6 in addrconf.c doesn't properly +handle starting/stopping the iteration. The problem is that at some point +during the iteration, an overflow is detected and the process is +subsequently stopped. The item being shown via seq_printf() when the +overflow occurs is not actually shown, though. When start() is +subsequently called to resume iterating, it returns the next item, and +thus the item that was being processed when the overflow occurred never +gets printed. + +Alter the meaning of the private data member "offset". Currently, when it +is not 0 (which only happens at the very beginning), "offset" represents +the next hlist item to be printed. After this change, "offset" always +represents the current item. + +This is also consistent with the private data member "bucket", which +represents the current bucket, and also the use of "pos" as defined in +seq_file.txt: + The pos passed to start() will always be either zero, or the most + recent pos used in the previous session. + +Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -4136,7 +4136,6 @@ static struct inet6_ifaddr *if6_get_firs + p++; + continue; + } +- state->offset++; + return ifa; + } + +@@ -4160,13 +4159,12 @@ static struct inet6_ifaddr *if6_get_next + return ifa; + } + ++ state->offset = 0; + while (++state->bucket < IN6_ADDR_HSIZE) { +- state->offset = 0; + hlist_for_each_entry_rcu_bh(ifa, + &inet6_addr_lst[state->bucket], addr_lst) { + if (!net_eq(dev_net(ifa->idev->dev), net)) + continue; +- state->offset++; + return ifa; + } + } diff --git a/queue-4.14/net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch b/queue-4.14/net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch new file mode 100644 index 00000000000..fb6b65e0387 --- /dev/null +++ b/queue-4.14/net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Eran Ben Elisha +Date: Sun, 16 Sep 2018 14:45:27 +0300 +Subject: net/mlx5: E-Switch, Fix out of bound access when setting vport rate + +From: Eran Ben Elisha + +[ Upstream commit 11aa5800ed66ed0415b7509f02881c76417d212a ] + +The code that deals with eswitch vport bw guarantee was going beyond the +eswitch vport array limit, fix that. This was pointed out by the kernel +address sanitizer (KASAN). + +The error from KASAN log: +[2018-09-15 15:04:45] BUG: KASAN: slab-out-of-bounds in +mlx5_eswitch_set_vport_rate+0x8c1/0xae0 [mlx5_core] + +Fixes: c9497c98901c ("net/mlx5: Add support for setting VF min rate") +Signed-off-by: Eran Ben Elisha +Reviewed-by: Or Gerlitz +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +@@ -1922,7 +1922,7 @@ static u32 calculate_vports_min_rate_div + u32 max_guarantee = 0; + int i; + +- for (i = 0; i <= esw->total_vports; i++) { ++ for (i = 0; i < esw->total_vports; i++) { + evport = &esw->vports[i]; + if (!evport->enabled || evport->info.min_rate < max_guarantee) + continue; +@@ -1942,7 +1942,7 @@ static int normalize_vports_min_rate(str + int err; + int i; + +- for (i = 0; i <= esw->total_vports; i++) { ++ for (i = 0; i < esw->total_vports; i++) { + evport = &esw->vports[i]; + if (!evport->enabled) + continue; diff --git a/queue-4.14/net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch b/queue-4.14/net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch new file mode 100644 index 00000000000..0d50626847b --- /dev/null +++ b/queue-4.14/net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch @@ -0,0 +1,38 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Jianbo Liu +Date: Sat, 25 Aug 2018 03:29:58 +0000 +Subject: net/mlx5e: Set vlan masks for all offloaded TC rules + +From: Jianbo Liu + +[ Upstream commit cee26487620bc9bc3c7db21b6984d91f7bae12ae ] + +In flow steering, if asked to, the hardware matches on the first ethertype +which is not vlan. It's possible to set a rule as follows, which is meant +to match on untagged packet, but will match on a vlan packet: + tc filter add dev eth0 parent ffff: protocol ip flower ... + +To avoid this for packets with single tag, we set vlan masks to tell +hardware to check the tags for every matched packet. + +Fixes: 095b6cfd69ce ('net/mlx5e: Add TC vlan match parsing') +Signed-off-by: Jianbo Liu +Reviewed-by: Or Gerlitz +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -864,6 +864,9 @@ static int __parse_cls_flower(struct mlx + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority); + } ++ } else { ++ MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); ++ MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { diff --git a/queue-4.14/net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch b/queue-4.14/net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch new file mode 100644 index 00000000000..29254bb5cfe --- /dev/null +++ b/queue-4.14/net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch @@ -0,0 +1,74 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Maxime Chevallier +Date: Fri, 5 Oct 2018 09:04:40 +0200 +Subject: net: mvpp2: Extract the correct ethtype from the skb for tx csum offload + +From: Maxime Chevallier + +[ Upstream commit 35f3625c21852ad839f20c91c7d81c4c1101e207 ] + +When offloading the L3 and L4 csum computation on TX, we need to extract +the l3_proto from the ethtype, independently of the presence of a vlan +tag. + +The actual driver uses skb->protocol as-is, resulting in packets with +the wrong L4 checksum being sent when there's a vlan tag in the packet +header and checksum offloading is enabled. + +This commit makes use of vlan_protocol_get() to get the correct ethtype +regardless the presence of a vlan tag. + +Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") +Signed-off-by: Maxime Chevallier +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvpp2.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvpp2.c ++++ b/drivers/net/ethernet/marvell/mvpp2.c +@@ -33,6 +33,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -5101,7 +5102,7 @@ static void mvpp2_txq_desc_put(struct mv + } + + /* Set Tx descriptors fields relevant for CSUM calculation */ +-static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto, ++static u32 mvpp2_txq_desc_csum(int l3_offs, __be16 l3_proto, + int ip_hdr_len, int l4_proto) + { + u32 command; +@@ -6065,14 +6066,15 @@ static u32 mvpp2_skb_tx_csum(struct mvpp + if (skb->ip_summed == CHECKSUM_PARTIAL) { + int ip_hdr_len = 0; + u8 l4_proto; ++ __be16 l3_proto = vlan_get_protocol(skb); + +- if (skb->protocol == htons(ETH_P_IP)) { ++ if (l3_proto == htons(ETH_P_IP)) { + struct iphdr *ip4h = ip_hdr(skb); + + /* Calculate IPv4 checksum and L4 checksum */ + ip_hdr_len = ip4h->ihl; + l4_proto = ip4h->protocol; +- } else if (skb->protocol == htons(ETH_P_IPV6)) { ++ } else if (l3_proto == htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = ipv6_hdr(skb); + + /* Read l4_protocol from one of IPv6 extra headers */ +@@ -6084,7 +6086,7 @@ static u32 mvpp2_skb_tx_csum(struct mvpp + } + + return mvpp2_txq_desc_csum(skb_network_offset(skb), +- skb->protocol, ip_hdr_len, l4_proto); ++ l3_proto, ip_hdr_len, l4_proto); + } + + return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE; diff --git a/queue-4.14/net-mvpp2-fix-a-txq_done-race-condition.patch b/queue-4.14/net-mvpp2-fix-a-txq_done-race-condition.patch new file mode 100644 index 00000000000..a26cc3e4174 --- /dev/null +++ b/queue-4.14/net-mvpp2-fix-a-txq_done-race-condition.patch @@ -0,0 +1,43 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Antoine Tenart +Date: Tue, 18 Sep 2018 16:58:47 +0200 +Subject: net: mvpp2: fix a txq_done race condition + +From: Antoine Tenart + +[ Upstream commit 774268f3e51b53ed432a1ec516574fd5ba469398 ] + +When no Tx IRQ is available, the txq_done() routine (called from +tx_done()) shouldn't be called from the polling function, as in such +case it is already called in the Tx path thanks to an hrtimer. This +mostly occurred when using PPv2.1, as the engine then do not have Tx +IRQs. + +Fixes: edc660fa09e2 ("net: mvpp2: replace TX coalescing interrupts with hrtimer") +Reported-by: Stefan Chulski +Signed-off-by: Antoine Tenart +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvpp2.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvpp2.c ++++ b/drivers/net/ethernet/marvell/mvpp2.c +@@ -6534,10 +6534,12 @@ static int mvpp2_poll(struct napi_struct + cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK); + } + +- cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK; +- if (cause_tx) { +- cause_tx >>= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET; +- mvpp2_tx_done(port, cause_tx, qv->sw_thread_id); ++ if (port->has_tx_irqs) { ++ cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK; ++ if (cause_tx) { ++ cause_tx >>= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET; ++ mvpp2_tx_done(port, cause_tx, qv->sw_thread_id); ++ } + } + + /* Process RX packets */ diff --git a/queue-4.14/net-packet-fix-packet-drop-as-of-virtio-gso.patch b/queue-4.14/net-packet-fix-packet-drop-as-of-virtio-gso.patch new file mode 100644 index 00000000000..1a3d8f54680 --- /dev/null +++ b/queue-4.14/net-packet-fix-packet-drop-as-of-virtio-gso.patch @@ -0,0 +1,79 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Jianfeng Tan +Date: Sat, 29 Sep 2018 15:41:27 +0000 +Subject: net/packet: fix packet drop as of virtio gso + +From: Jianfeng Tan + +[ Upstream commit 9d2f67e43b73e8af7438be219b66a5de0cfa8bd9 ] + +When we use raw socket as the vhost backend, a packet from virito with +gso offloading information, cannot be sent out in later validaton at +xmit path, as we did not set correct skb->protocol which is further used +for looking up the gso function. + +To fix this, we set this field according to virito hdr information. + +Fixes: e858fae2b0b8f4 ("virtio_net: use common code for virtio_net_hdr and skb GSO conversion") +Signed-off-by: Jianfeng Tan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/virtio_net.h | 18 ++++++++++++++++++ + net/packet/af_packet.c | 11 +++++++---- + 2 files changed, 25 insertions(+), 4 deletions(-) + +--- a/include/linux/virtio_net.h ++++ b/include/linux/virtio_net.h +@@ -5,6 +5,24 @@ + #include + #include + ++static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, ++ const struct virtio_net_hdr *hdr) ++{ ++ switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { ++ case VIRTIO_NET_HDR_GSO_TCPV4: ++ case VIRTIO_NET_HDR_GSO_UDP: ++ skb->protocol = cpu_to_be16(ETH_P_IP); ++ break; ++ case VIRTIO_NET_HDR_GSO_TCPV6: ++ skb->protocol = cpu_to_be16(ETH_P_IPV6); ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ + static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, + const struct virtio_net_hdr *hdr, + bool little_endian) +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2753,10 +2753,12 @@ tpacket_error: + } + } + +- if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr, +- vio_le())) { +- tp_len = -EINVAL; +- goto tpacket_error; ++ if (po->has_vnet_hdr) { ++ if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { ++ tp_len = -EINVAL; ++ goto tpacket_error; ++ } ++ virtio_net_hdr_set_proto(skb, vnet_hdr); + } + + skb->destructor = tpacket_destruct_skb; +@@ -2952,6 +2954,7 @@ static int packet_snd(struct socket *soc + if (err) + goto out_free; + len += sizeof(vnet_hdr); ++ virtio_net_hdr_set_proto(skb, &vnet_hdr); + } + + skb_probe_transport_header(skb, reserve); diff --git a/queue-4.14/net-sched-add-policy-validation-for-tc-attributes.patch b/queue-4.14/net-sched-add-policy-validation-for-tc-attributes.patch new file mode 100644 index 00000000000..05bcdb37979 --- /dev/null +++ b/queue-4.14/net-sched-add-policy-validation-for-tc-attributes.patch @@ -0,0 +1,85 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: David Ahern +Date: Wed, 3 Oct 2018 15:05:36 -0700 +Subject: net: sched: Add policy validation for tc attributes + +From: David Ahern + +[ Upstream commit 8b4c3cdd9dd8290343ce959a132d3b334062c5b9 ] + +A number of TC attributes are processed without proper validation +(e.g., length checks). Add a tca policy for all input attributes and use +when invoking nlmsg_parse. + +The 2 Fixes tags below cover the latest additions. The other attributes +are a string (KIND), nested attribute (OPTIONS which does seem to have +validation in most cases), for dumps only or a flag. + +Fixes: 5bc1701881e39 ("net: sched: introduce multichain support for filters") +Fixes: d47a6b0e7c492 ("net: sched: introduce ingress/egress block index attributes for qdisc") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_api.c | 22 ++++++++++++++++++---- + 1 file changed, 18 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -1216,6 +1216,16 @@ check_loop_fn(struct Qdisc *q, unsigned + * Delete/get qdisc. + */ + ++const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { ++ [TCA_KIND] = { .type = NLA_STRING }, ++ [TCA_OPTIONS] = { .type = NLA_NESTED }, ++ [TCA_RATE] = { .type = NLA_BINARY, ++ .len = sizeof(struct tc_estimator) }, ++ [TCA_STAB] = { .type = NLA_NESTED }, ++ [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG }, ++ [TCA_CHAIN] = { .type = NLA_U32 }, ++}; ++ + static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) + { +@@ -1232,7 +1242,8 @@ static int tc_get_qdisc(struct sk_buff * + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + +- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); ++ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, ++ extack); + if (err < 0) + return err; + +@@ -1302,7 +1313,8 @@ static int tc_modify_qdisc(struct sk_buf + + replay: + /* Reinit, just in case something touches this. */ +- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); ++ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, ++ extack); + if (err < 0) + return err; + +@@ -1512,7 +1524,8 @@ static int tc_dump_qdisc(struct sk_buff + idx = 0; + ASSERT_RTNL(); + +- err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); ++ err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, ++ rtm_tca_policy, NULL); + if (err < 0) + return err; + +@@ -1729,7 +1742,8 @@ static int tc_ctl_tclass(struct sk_buff + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + +- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); ++ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, ++ extack); + if (err < 0) + return err; + diff --git a/queue-4.14/net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch b/queue-4.14/net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch new file mode 100644 index 00000000000..319397df55b --- /dev/null +++ b/queue-4.14/net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch @@ -0,0 +1,66 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Jose Abreu +Date: Mon, 17 Sep 2018 09:22:57 +0100 +Subject: net: stmmac: Fixup the tail addr setting in xmit path + +From: Jose Abreu + +[ Upstream commit 0431100b3d82c509729ece1ab22ada2484e209c1 ] + +Currently we are always setting the tail address of descriptor list to +the end of the pre-allocated list. + +According to databook this is not correct. Tail address should point to +the last available descriptor + 1, which means we have to update the +tail address everytime we call the xmit function. + +This should make no impact in older versions of MAC but in newer +versions there are some DMA features which allows the IP to fetch +descriptors in advance and in a non sequential order so its critical +that we set the tail address correctly. + +Signed-off-by: Jose Abreu +Fixes: f748be531d70 ("stmmac: support new GMAC4") +Cc: David S. Miller +Cc: Joao Pinto +Cc: Giuseppe Cavallaro +Cc: Alexandre Torgue +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -2190,8 +2190,7 @@ static int stmmac_init_dma_engine(struct + priv->plat->dma_cfg, + tx_q->dma_tx_phy, chan); + +- tx_q->tx_tail_addr = tx_q->dma_tx_phy + +- (DMA_TX_SIZE * sizeof(struct dma_desc)); ++ tx_q->tx_tail_addr = tx_q->dma_tx_phy; + priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, + tx_q->tx_tail_addr, + chan); +@@ -2963,6 +2962,7 @@ static netdev_tx_t stmmac_tso_xmit(struc + + netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); + ++ tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc)); + priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr, + queue); + +@@ -3178,9 +3178,11 @@ static netdev_tx_t stmmac_xmit(struct sk + + if (priv->synopsys_id < DWMAC_CORE_4_00) + priv->hw->dma->enable_dma_transmission(priv->ioaddr); +- else ++ else { ++ tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc)); + priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr, + queue); ++ } + + return NETDEV_TX_OK; + diff --git a/queue-4.14/net-systemport-fix-wake-up-interrupt-race-during-resume.patch b/queue-4.14/net-systemport-fix-wake-up-interrupt-race-during-resume.patch new file mode 100644 index 00000000000..600a7bfa7b4 --- /dev/null +++ b/queue-4.14/net-systemport-fix-wake-up-interrupt-race-during-resume.patch @@ -0,0 +1,90 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Florian Fainelli +Date: Tue, 2 Oct 2018 16:52:03 -0700 +Subject: net: systemport: Fix wake-up interrupt race during resume + +From: Florian Fainelli + +[ Upstream commit 45ec318578c0c22a11f5b9927d064418e1ab1905 ] + +The AON_PM_L2 is normally used to trigger and identify the source of a +wake-up event. Since the RX_SYS clock is no longer turned off, we also +have an interrupt being sent to the SYSTEMPORT INTRL_2_0 controller, and +that interrupt remains active up until the magic packet detector is +disabled which happens much later during the driver resumption. + +The race happens if we have a CPU that is entering the SYSTEMPORT +INTRL2_0 handler during resume, and another CPU has managed to clear the +wake-up interrupt during bcm_sysport_resume_from_wol(). In that case, we +have the first CPU stuck in the interrupt handler with an interrupt +cause that has been cleared under its feet, and so we keep returning +IRQ_NONE and we never make any progress. + +This was not a problem before because we would always turn off the +RX_SYS clock during WoL, so the SYSTEMPORT INTRL2_0 would also be turned +off as well, thus not latching the interrupt. + +The fix is to make sure we do not enable either the MPD or +BRCM_TAG_MATCH interrupts since those are redundant with what the +AON_PM_L2 interrupt controller already processes and they would cause +such a race to occur. + +Fixes: bb9051a2b230 ("net: systemport: Add support for WAKE_FILTER") +Fixes: 83e82f4c706b ("net: systemport: add Wake-on-LAN support") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bcmsysport.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bcmsysport.c ++++ b/drivers/net/ethernet/broadcom/bcmsysport.c +@@ -1001,14 +1001,22 @@ static void bcm_sysport_resume_from_wol( + { + u32 reg; + +- /* Stop monitoring MPD interrupt */ +- intrl2_0_mask_set(priv, INTRL2_0_MPD); +- + /* Clear the MagicPacket detection logic */ + reg = umac_readl(priv, UMAC_MPD_CTRL); + reg &= ~MPD_EN; + umac_writel(priv, reg, UMAC_MPD_CTRL); + ++ reg = intrl2_0_readl(priv, INTRL2_CPU_STATUS); ++ if (reg & INTRL2_0_MPD) ++ netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n"); ++ ++ if (reg & INTRL2_0_BRCM_MATCH_TAG) { ++ reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) & ++ RXCHK_BRCM_TAG_MATCH_MASK; ++ netdev_info(priv->netdev, ++ "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg); ++ } ++ + netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n"); + } + +@@ -1043,11 +1051,6 @@ static irqreturn_t bcm_sysport_rx_isr(in + if (priv->irq0_stat & INTRL2_0_TX_RING_FULL) + bcm_sysport_tx_reclaim_all(priv); + +- if (priv->irq0_stat & INTRL2_0_MPD) { +- netdev_info(priv->netdev, "Wake-on-LAN interrupt!\n"); +- bcm_sysport_resume_from_wol(priv); +- } +- + if (!priv->is_lite) + goto out; + +@@ -2248,9 +2251,6 @@ static int bcm_sysport_suspend_to_wol(st + /* UniMAC receive needs to be turned on */ + umac_enable_set(priv, CMD_RX_EN, 1); + +- /* Enable the interrupt wake-up source */ +- intrl2_0_mask_clear(priv, INTRL2_0_MPD); +- + netif_dbg(priv, wol, ndev, "entered WOL mode\n"); + + return 0; diff --git a/queue-4.14/net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch b/queue-4.14/net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch new file mode 100644 index 00000000000..eeb43354dbf --- /dev/null +++ b/queue-4.14/net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Yu Zhao +Date: Fri, 28 Sep 2018 17:04:30 -0600 +Subject: net/usb: cancel pending work when unbinding smsc75xx + +From: Yu Zhao + +[ Upstream commit f7b2a56e1f3dcbdb4cf09b2b63e859ffe0e09df8 ] + +Cancel pending work before freeing smsc75xx private data structure +during binding. This fixes the following crash in the driver: + +BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 +IP: mutex_lock+0x2b/0x3f + +Workqueue: events smsc75xx_deferred_multicast_write [smsc75xx] +task: ffff8caa83e85700 task.stack: ffff948b80518000 +RIP: 0010:mutex_lock+0x2b/0x3f + +Call Trace: + smsc75xx_deferred_multicast_write+0x40/0x1af [smsc75xx] + process_one_work+0x18d/0x2fc + worker_thread+0x1a2/0x269 + ? pr_cont_work+0x58/0x58 + kthread+0xfa/0x10a + ? pr_cont_work+0x58/0x58 + ? rcu_read_unlock_sched_notrace+0x48/0x48 + ret_from_fork+0x22/0x40 + +Signed-off-by: Yu Zhao +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/smsc75xx.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/smsc75xx.c ++++ b/drivers/net/usb/smsc75xx.c +@@ -1517,6 +1517,7 @@ static void smsc75xx_unbind(struct usbne + { + struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); + if (pdata) { ++ cancel_work_sync(&pdata->set_multicast); + netif_dbg(dev, ifdown, dev->net, "free pdata\n"); + kfree(pdata); + pdata = NULL; diff --git a/queue-4.14/netlabel-check-for-ipv4mask-in-addrinfo_get.patch b/queue-4.14/netlabel-check-for-ipv4mask-in-addrinfo_get.patch new file mode 100644 index 00000000000..b13aa24968a --- /dev/null +++ b/queue-4.14/netlabel-check-for-ipv4mask-in-addrinfo_get.patch @@ -0,0 +1,61 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Sean Tranchetti +Date: Thu, 20 Sep 2018 14:29:45 -0600 +Subject: netlabel: check for IPV4MASK in addrinfo_get + +From: Sean Tranchetti + +[ Upstream commit f88b4c01b97e09535505cf3c327fdbce55c27f00 ] + +netlbl_unlabel_addrinfo_get() assumes that if it finds the +NLBL_UNLABEL_A_IPV4ADDR attribute, it must also have the +NLBL_UNLABEL_A_IPV4MASK attribute as well. However, this is +not necessarily the case as the current checks in +netlbl_unlabel_staticadd() and friends are not sufficent to +enforce this. + +If passed a netlink message with NLBL_UNLABEL_A_IPV4ADDR, +NLBL_UNLABEL_A_IPV6ADDR, and NLBL_UNLABEL_A_IPV6MASK attributes, +these functions will all call netlbl_unlabel_addrinfo_get() which +will then attempt dereference NULL when fetching the non-existent +NLBL_UNLABEL_A_IPV4MASK attribute: + +Unable to handle kernel NULL pointer dereference at virtual address 0 +Process unlab (pid: 31762, stack limit = 0xffffff80502d8000) +Call trace: + netlbl_unlabel_addrinfo_get+0x44/0xd8 + netlbl_unlabel_staticremovedef+0x98/0xe0 + genl_rcv_msg+0x354/0x388 + netlink_rcv_skb+0xac/0x118 + genl_rcv+0x34/0x48 + netlink_unicast+0x158/0x1f0 + netlink_sendmsg+0x32c/0x338 + sock_sendmsg+0x44/0x60 + ___sys_sendmsg+0x1d0/0x2a8 + __sys_sendmsg+0x64/0xb4 + SyS_sendmsg+0x34/0x4c + el0_svc_naked+0x34/0x38 +Code: 51001149 7100113f 540000a0 f9401508 (79400108) +---[ end trace f6438a488e737143 ]--- +Kernel panic - not syncing: Fatal exception + +Signed-off-by: Sean Tranchetti + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlabel/netlabel_unlabeled.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/netlabel/netlabel_unlabeled.c ++++ b/net/netlabel/netlabel_unlabeled.c +@@ -781,7 +781,8 @@ static int netlbl_unlabel_addrinfo_get(s + { + u32 addr_len; + +- if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR]) { ++ if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] && ++ info->attrs[NLBL_UNLABEL_A_IPV4MASK]) { + addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); + if (addr_len != sizeof(struct in_addr) && + addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK])) diff --git a/queue-4.14/nfp-avoid-soft-lockups-under-control-message-storm.patch b/queue-4.14/nfp-avoid-soft-lockups-under-control-message-storm.patch new file mode 100644 index 00000000000..58cecb6cb1b --- /dev/null +++ b/queue-4.14/nfp-avoid-soft-lockups-under-control-message-storm.patch @@ -0,0 +1,63 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Jakub Kicinski +Date: Tue, 2 Oct 2018 10:10:14 -0700 +Subject: nfp: avoid soft lockups under control message storm + +From: Jakub Kicinski + +[ Upstream commit ff58e2df62ce29d0552278c290ae494b30fe0c6f ] + +When FW floods the driver with control messages try to exit the cmsg +processing loop every now and then to avoid soft lockups. Cmsg +processing is generally very lightweight so 512 seems like a reasonable +budget, which should not be exceeded under normal conditions. + +Fixes: 77ece8d5f196 ("nfp: add control vNIC datapath") +Signed-off-by: Jakub Kicinski +Reviewed-by: Simon Horman +Tested-by: Pieter Jansen van Vuuren +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +@@ -2058,14 +2058,17 @@ nfp_ctrl_rx_one(struct nfp_net *nn, stru + return true; + } + +-static void nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) ++static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) + { + struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; + struct nfp_net *nn = r_vec->nfp_net; + struct nfp_net_dp *dp = &nn->dp; ++ unsigned int budget = 512; + +- while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring)) ++ while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) + continue; ++ ++ return budget; + } + + static void nfp_ctrl_poll(unsigned long arg) +@@ -2077,9 +2080,13 @@ static void nfp_ctrl_poll(unsigned long + __nfp_ctrl_tx_queued(r_vec); + spin_unlock_bh(&r_vec->lock); + +- nfp_ctrl_rx(r_vec); +- +- nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); ++ if (nfp_ctrl_rx(r_vec)) { ++ nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); ++ } else { ++ tasklet_schedule(&r_vec->tasklet); ++ nn_dp_warn(&r_vec->nfp_net->dp, ++ "control message budget exceeded!\n"); ++ } + } + + /* Setup and Configuration diff --git a/queue-4.14/qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch b/queue-4.14/qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch new file mode 100644 index 00000000000..df0719885d6 --- /dev/null +++ b/queue-4.14/qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch @@ -0,0 +1,149 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Shahed Shaikh +Date: Wed, 26 Sep 2018 12:41:10 -0700 +Subject: qlcnic: fix Tx descriptor corruption on 82xx devices + +From: Shahed Shaikh + +[ Upstream commit c333fa0c4f220f8f7ea5acd6b0ebf3bf13fd684d ] + +In regular NIC transmission flow, driver always configures MAC using +Tx queue zero descriptor as a part of MAC learning flow. +But with multi Tx queue supported NIC, regular transmission can occur on +any non-zero Tx queue and from that context it uses +Tx queue zero descriptor to configure MAC, at the same time TX queue +zero could be used by another CPU for regular transmission +which could lead to Tx queue zero descriptor corruption and cause FW +abort. + +This patch fixes this in such a way that driver always configures +learned MAC address from the same Tx queue which is used for +regular transmission. + +Fixes: 7e2cf4feba05 ("qlcnic: change driver hardware interface mechanism") +Signed-off-by: Shahed Shaikh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 8 +++++--- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 3 ++- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h | 3 ++- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h | 3 ++- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 12 ++++++------ + 5 files changed, 17 insertions(+), 12 deletions(-) + +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +@@ -1800,7 +1800,8 @@ struct qlcnic_hardware_ops { + int (*config_loopback) (struct qlcnic_adapter *, u8); + int (*clear_loopback) (struct qlcnic_adapter *, u8); + int (*config_promisc_mode) (struct qlcnic_adapter *, u32); +- void (*change_l2_filter) (struct qlcnic_adapter *, u64 *, u16); ++ void (*change_l2_filter)(struct qlcnic_adapter *adapter, u64 *addr, ++ u16 vlan, struct qlcnic_host_tx_ring *tx_ring); + int (*get_board_info) (struct qlcnic_adapter *); + void (*set_mac_filter_count) (struct qlcnic_adapter *); + void (*free_mac_list) (struct qlcnic_adapter *); +@@ -2064,9 +2065,10 @@ static inline int qlcnic_nic_set_promisc + } + + static inline void qlcnic_change_filter(struct qlcnic_adapter *adapter, +- u64 *addr, u16 id) ++ u64 *addr, u16 vlan, ++ struct qlcnic_host_tx_ring *tx_ring) + { +- adapter->ahw->hw_ops->change_l2_filter(adapter, addr, id); ++ adapter->ahw->hw_ops->change_l2_filter(adapter, addr, vlan, tx_ring); + } + + static inline int qlcnic_get_board_info(struct qlcnic_adapter *adapter) +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +@@ -2134,7 +2134,8 @@ out: + } + + void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, +- u16 vlan_id) ++ u16 vlan_id, ++ struct qlcnic_host_tx_ring *tx_ring) + { + u8 mac[ETH_ALEN]; + memcpy(&mac, addr, ETH_ALEN); +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h +@@ -550,7 +550,8 @@ int qlcnic_83xx_wrt_reg_indirect(struct + int qlcnic_83xx_nic_set_promisc(struct qlcnic_adapter *, u32); + int qlcnic_83xx_config_hw_lro(struct qlcnic_adapter *, int); + int qlcnic_83xx_config_rss(struct qlcnic_adapter *, int); +-void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *, u64 *, u16); ++void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, ++ u16 vlan, struct qlcnic_host_tx_ring *ring); + int qlcnic_83xx_get_pci_info(struct qlcnic_adapter *, struct qlcnic_pci_info *); + int qlcnic_83xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *); + void qlcnic_83xx_initialize_nic(struct qlcnic_adapter *, int); +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h +@@ -173,7 +173,8 @@ int qlcnic_82xx_napi_add(struct qlcnic_a + struct net_device *netdev); + void qlcnic_82xx_get_beacon_state(struct qlcnic_adapter *); + void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, +- u64 *uaddr, u16 vlan_id); ++ u64 *uaddr, u16 vlan_id, ++ struct qlcnic_host_tx_ring *tx_ring); + int qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *, + struct ethtool_coalesce *); + int qlcnic_82xx_set_rx_coalesce(struct qlcnic_adapter *); +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +@@ -268,13 +268,12 @@ static void qlcnic_add_lb_filter(struct + } + + void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr, +- u16 vlan_id) ++ u16 vlan_id, struct qlcnic_host_tx_ring *tx_ring) + { + struct cmd_desc_type0 *hwdesc; + struct qlcnic_nic_req *req; + struct qlcnic_mac_req *mac_req; + struct qlcnic_vlan_req *vlan_req; +- struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring; + u32 producer; + u64 word; + +@@ -301,7 +300,8 @@ void qlcnic_82xx_change_filter(struct ql + + static void qlcnic_send_filter(struct qlcnic_adapter *adapter, + struct cmd_desc_type0 *first_desc, +- struct sk_buff *skb) ++ struct sk_buff *skb, ++ struct qlcnic_host_tx_ring *tx_ring) + { + struct vlan_ethhdr *vh = (struct vlan_ethhdr *)(skb->data); + struct ethhdr *phdr = (struct ethhdr *)(skb->data); +@@ -335,7 +335,7 @@ static void qlcnic_send_filter(struct ql + tmp_fil->vlan_id == vlan_id) { + if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) + qlcnic_change_filter(adapter, &src_addr, +- vlan_id); ++ vlan_id, tx_ring); + tmp_fil->ftime = jiffies; + return; + } +@@ -350,7 +350,7 @@ static void qlcnic_send_filter(struct ql + if (!fil) + return; + +- qlcnic_change_filter(adapter, &src_addr, vlan_id); ++ qlcnic_change_filter(adapter, &src_addr, vlan_id, tx_ring); + fil->ftime = jiffies; + fil->vlan_id = vlan_id; + memcpy(fil->faddr, &src_addr, ETH_ALEN); +@@ -766,7 +766,7 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_ + } + + if (adapter->drv_mac_learn) +- qlcnic_send_filter(adapter, first_desc, skb); ++ qlcnic_send_filter(adapter, first_desc, skb, tx_ring); + + tx_ring->tx_stats.tx_bytes += skb->len; + tx_ring->tx_stats.xmit_called++; diff --git a/queue-4.14/qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch b/queue-4.14/qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch new file mode 100644 index 00000000000..9e9755a6480 --- /dev/null +++ b/queue-4.14/qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch @@ -0,0 +1,30 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Giacinto Cifelli +Date: Wed, 10 Oct 2018 20:05:53 +0200 +Subject: qmi_wwan: Added support for Gemalto's Cinterion ALASxx WWAN interface + +From: Giacinto Cifelli + +[ Upstream commit 4f7617705bfff84d756fe4401a1f4f032f374984 ] + +Added support for Gemalto's Cinterion ALASxx WWAN interfaces +by adding QMI_FIXED_INTF with Cinterion's VID and PID. + +Signed-off-by: Giacinto Cifelli +Acked-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/qmi_wwan.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -1233,6 +1233,7 @@ static const struct usb_device_id produc + {QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */ + {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */ + {QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */ ++ {QMI_FIXED_INTF(0x1e2d, 0x0063, 10)}, /* Cinterion ALASxx (1 RmNet) */ + {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */ + {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */ + {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/ diff --git a/queue-4.14/rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch b/queue-4.14/rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch new file mode 100644 index 00000000000..8d61c233a02 --- /dev/null +++ b/queue-4.14/rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch @@ -0,0 +1,167 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Mauricio Faria de Oliveira +Date: Mon, 1 Oct 2018 22:46:40 -0300 +Subject: rtnetlink: fix rtnl_fdb_dump() for ndmsg header + +From: Mauricio Faria de Oliveira + +[ Upstream commit bd961c9bc66497f0c63f4ba1d02900bb85078366 ] + +Currently, rtnl_fdb_dump() assumes the family header is 'struct ifinfomsg', +which is not always true -- 'struct ndmsg' is used by iproute2 ('ip neigh'). + +The problem is, the function bails out early if nlmsg_parse() fails, which +does occur for iproute2 usage of 'struct ndmsg' because the payload length +is shorter than the family header alone (as 'struct ifinfomsg' is assumed). + +This breaks backward compatibility with userspace -- nothing is sent back. + +Some examples with iproute2 and netlink library for go [1]: + + 1) $ bridge fdb show + 33:33:00:00:00:01 dev ens3 self permanent + 01:00:5e:00:00:01 dev ens3 self permanent + 33:33:ff:15:98:30 dev ens3 self permanent + + This one works, as it uses 'struct ifinfomsg'. + + fdb_show() @ iproute2/bridge/fdb.c + """ + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + ... + if (rtnl_dump_request(&rth, RTM_GETNEIGH, [...] + """ + + 2) $ ip --family bridge neigh + RTNETLINK answers: Invalid argument + Dump terminated + + This one fails, as it uses 'struct ndmsg'. + + do_show_or_flush() @ iproute2/ip/ipneigh.c + """ + .n.nlmsg_type = RTM_GETNEIGH, + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), + """ + + 3) $ ./neighlist + < no output > + + This one fails, as it uses 'struct ndmsg'-based. + + neighList() @ netlink/neigh_linux.go + """ + req := h.newNetlinkRequest(unix.RTM_GETNEIGH, [...] + msg := Ndmsg{ + """ + +The actual breakage was introduced by commit 0ff50e83b512 ("net: rtnetlink: +bail out from rtnl_fdb_dump() on parse error"), because nlmsg_parse() fails +if the payload length (with the _actual_ family header) is less than the +family header length alone (which is assumed, in parameter 'hdrlen'). +This is true in the examples above with struct ndmsg, with size and payload +length shorter than struct ifinfomsg. + +However, that commit just intends to fix something under the assumption the +family header is indeed an 'struct ifinfomsg' - by preventing access to the +payload as such (via 'ifm' pointer) if the payload length is not sufficient +to actually contain it. + +The assumption was introduced by commit 5e6d24358799 ("bridge: netlink dump +interface at par with brctl"), to support iproute2's 'bridge fdb' command +(not 'ip neigh') which indeed uses 'struct ifinfomsg', thus is not broken. + +So, in order to unbreak the 'struct ndmsg' family headers and still allow +'struct ifinfomsg' to continue to work, check for the known message sizes +used with 'struct ndmsg' in iproute2 (with zero or one attribute which is +not used in this function anyway) then do not parse the data as ifinfomsg. + +Same examples with this patch applied (or revert/before the original fix): + + $ bridge fdb show + 33:33:00:00:00:01 dev ens3 self permanent + 01:00:5e:00:00:01 dev ens3 self permanent + 33:33:ff:15:98:30 dev ens3 self permanent + + $ ip --family bridge neigh + dev ens3 lladdr 33:33:00:00:00:01 PERMANENT + dev ens3 lladdr 01:00:5e:00:00:01 PERMANENT + dev ens3 lladdr 33:33:ff:15:98:30 PERMANENT + + $ ./neighlist + netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0x0, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0} + netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x1, 0x0, 0x5e, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0} + netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0xff, 0x15, 0x98, 0x30}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0} + +Tested on mainline (v4.19-rc6) and net-next (3bd09b05b068). + +References: + +[1] netlink library for go (test-case) + https://github.com/vishvananda/netlink + + $ cat ~/go/src/neighlist/main.go + package main + import ("fmt"; "syscall"; "github.com/vishvananda/netlink") + func main() { + neighs, _ := netlink.NeighList(0, syscall.AF_BRIDGE) + for _, neigh := range neighs { fmt.Printf("%#v\n", neigh) } + } + + $ export GOPATH=~/go + $ go get github.com/vishvananda/netlink + $ go build neighlist + $ ~/go/src/neighlist/neighlist + +Thanks to David Ahern for suggestions to improve this patch. + +Fixes: 0ff50e83b512 ("net: rtnetlink: bail out from rtnl_fdb_dump() on parse error") +Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl") +Reported-by: Aidan Obley +Signed-off-by: Mauricio Faria de Oliveira +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 29 ++++++++++++++++++++--------- + 1 file changed, 20 insertions(+), 9 deletions(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -3292,16 +3292,27 @@ static int rtnl_fdb_dump(struct sk_buff + int err = 0; + int fidx = 0; + +- err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, +- IFLA_MAX, ifla_policy, NULL); +- if (err < 0) { +- return -EINVAL; +- } else if (err == 0) { +- if (tb[IFLA_MASTER]) +- br_idx = nla_get_u32(tb[IFLA_MASTER]); +- } ++ /* A hack to preserve kernel<->userspace interface. ++ * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0. ++ * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails. ++ * So, check for ndmsg with an optional u32 attribute (not used here). ++ * Fortunately these sizes don't conflict with the size of ifinfomsg ++ * with an optional attribute. ++ */ ++ if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) && ++ (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) + ++ nla_attr_size(sizeof(u32)))) { ++ err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, ++ IFLA_MAX, ifla_policy, NULL); ++ if (err < 0) { ++ return -EINVAL; ++ } else if (err == 0) { ++ if (tb[IFLA_MASTER]) ++ br_idx = nla_get_u32(tb[IFLA_MASTER]); ++ } + +- brport_idx = ifm->ifi_index; ++ brport_idx = ifm->ifi_index; ++ } + + if (br_idx) { + br_dev = __dev_get_by_index(net, br_idx); diff --git a/queue-4.14/rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch b/queue-4.14/rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch new file mode 100644 index 00000000000..231cc83abb6 --- /dev/null +++ b/queue-4.14/rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch @@ -0,0 +1,54 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Eric Dumazet +Date: Tue, 2 Oct 2018 15:47:35 -0700 +Subject: rtnl: limit IFLA_NUM_TX_QUEUES and IFLA_NUM_RX_QUEUES to 4096 + +From: Eric Dumazet + +[ Upstream commit 0e1d6eca5113858ed2caea61a5adc03c595f6096 ] + +We have an impressive number of syzkaller bugs that are linked +to the fact that syzbot was able to create a networking device +with millions of TX (or RX) queues. + +Let's limit the number of RX/TX queues to 4096, this really should +cover all known cases. + +A separate patch will add various cond_resched() in the loops +handling sysfs entries at device creation and dismantle. + +Tested: + +lpaa6:~# ip link add gre-4097 numtxqueues 4097 numrxqueues 4097 type ip6gretap +RTNETLINK answers: Invalid argument + +lpaa6:~# time ip link add gre-4096 numtxqueues 4096 numrxqueues 4096 type ip6gretap + +real 0m0.180s +user 0m0.000s +sys 0m0.107s + +Fixes: 76ff5cc91935 ("rtnl: allow to specify number of rx and tx queues on device creation") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -2430,6 +2430,12 @@ struct net_device *rtnl_create_link(stru + else if (ops->get_num_rx_queues) + num_rx_queues = ops->get_num_rx_queues(); + ++ if (num_tx_queues < 1 || num_tx_queues > 4096) ++ return ERR_PTR(-EINVAL); ++ ++ if (num_rx_queues < 1 || num_rx_queues > 4096) ++ return ERR_PTR(-EINVAL); ++ + dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type, + ops->setup, num_tx_queues, num_rx_queues); + if (!dev) diff --git a/queue-4.14/sctp-update-dst-pmtu-with-the-correct-daddr.patch b/queue-4.14/sctp-update-dst-pmtu-with-the-correct-daddr.patch new file mode 100644 index 00000000000..625b58b0e37 --- /dev/null +++ b/queue-4.14/sctp-update-dst-pmtu-with-the-correct-daddr.patch @@ -0,0 +1,65 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Xin Long +Date: Thu, 20 Sep 2018 17:27:28 +0800 +Subject: sctp: update dst pmtu with the correct daddr + +From: Xin Long + +[ Upstream commit d7ab5cdce54da631f0c8c11e506c974536a3581e ] + +When processing pmtu update from an icmp packet, it calls .update_pmtu +with sk instead of skb in sctp_transport_update_pmtu. + +However for sctp, the daddr in the transport might be different from +inet_sock->inet_daddr or sk->sk_v6_daddr, which is used to update or +create the route cache. The incorrect daddr will cause a different +route cache created for the path. + +So before calling .update_pmtu, inet_sock->inet_daddr/sk->sk_v6_daddr +should be updated with the daddr in the transport, and update it back +after it's done. + +The issue has existed since route exceptions introduction. + +Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions.") +Reported-by: ian.periam@dialogic.com +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/transport.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/net/sctp/transport.c ++++ b/net/sctp/transport.c +@@ -254,6 +254,7 @@ void sctp_transport_pmtu(struct sctp_tra + bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) + { + struct dst_entry *dst = sctp_transport_dst_check(t); ++ struct sock *sk = t->asoc->base.sk; + bool change = true; + + if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { +@@ -265,12 +266,19 @@ bool sctp_transport_update_pmtu(struct s + pmtu = SCTP_TRUNC4(pmtu); + + if (dst) { +- dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); ++ struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family); ++ union sctp_addr addr; ++ ++ pf->af->from_sk(&addr, sk); ++ pf->to_sk_daddr(&t->ipaddr, sk); ++ dst->ops->update_pmtu(dst, sk, NULL, pmtu); ++ pf->to_sk_daddr(&addr, sk); ++ + dst = sctp_transport_dst_check(t); + } + + if (!dst) { +- t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); ++ t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); + dst = t->dst; + } + diff --git a/queue-4.14/series b/queue-4.14/series new file mode 100644 index 00000000000..0d1330ac239 --- /dev/null +++ b/queue-4.14/series @@ -0,0 +1,38 @@ +bnxt_en-fix-tx-timeout-during-netpoll.patch +bnxt_en-free-hwrm-resources-if-driver-probe-fails.patch +bonding-avoid-possible-dead-lock.patch +ip6_tunnel-be-careful-when-accessing-the-inner-header.patch +ip_tunnel-be-careful-when-accessing-the-inner-header.patch +ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch +ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch +net-dsa-bcm_sf2-call-setup-during-switch-resume.patch +net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch +net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch +net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch +netlabel-check-for-ipv4mask-in-addrinfo_get.patch +net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch +net-mvpp2-fix-a-txq_done-race-condition.patch +net-sched-add-policy-validation-for-tc-attributes.patch +net-systemport-fix-wake-up-interrupt-race-during-resume.patch +net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch +qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch +qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch +rtnetlink-fix-rtnl_fdb_dump-for-ndmsg-header.patch +rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch +sctp-update-dst-pmtu-with-the-correct-daddr.patch +team-forbid-enslaving-team-device-to-itself.patch +tipc-fix-flow-control-accounting-for-implicit-connect.patch +udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch +net-stmmac-fixup-the-tail-addr-setting-in-xmit-path.patch +net-packet-fix-packet-drop-as-of-virtio-gso.patch +net-dsa-bcm_sf2-fix-unbind-ordering.patch +net-mlx5e-set-vlan-masks-for-all-offloaded-tc-rules.patch +net-aquantia-memory-corruption-on-jumbo-frames.patch +net-mlx5-e-switch-fix-out-of-bound-access-when-setting-vport-rate.patch +bonding-pass-link-local-packets-to-bonding-master-also.patch +bonding-fix-warning-message.patch +nfp-avoid-soft-lockups-under-control-message-storm.patch +bnxt_en-don-t-try-to-offload-vlan-modify-action.patch +net-ethtool-ethtool_gufo-did-not-and-should-not-require-cap_net_admin.patch +tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch +inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch diff --git a/queue-4.14/tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch b/queue-4.14/tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch new file mode 100644 index 00000000000..944d9a79e1c --- /dev/null +++ b/queue-4.14/tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch @@ -0,0 +1,85 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Eric Dumazet +Date: Mon, 1 Oct 2018 15:02:26 -0700 +Subject: tcp/dccp: fix lockdep issue when SYN is backlogged + +From: Eric Dumazet + +[ Upstream commit 1ad98e9d1bdf4724c0a8532fabd84bf3c457c2bc ] + +In normal SYN processing, packets are handled without listener +lock and in RCU protected ingress path. + +But syzkaller is known to be able to trick us and SYN +packets might be processed in process context, after being +queued into socket backlog. + +In commit 06f877d613be ("tcp/dccp: fix other lockdep splats +accessing ireq_opt") I made a very stupid fix, that happened +to work mostly because of the regular path being RCU protected. + +Really the thing protecting ireq->ireq_opt is RCU read lock, +and the pseudo request refcnt is not relevant. + +This patch extends what I did in commit 449809a66c1d ("tcp/dccp: +block BH for SYN processing") by adding an extra rcu_read_{lock|unlock} +pair in the paths that might be taken when processing SYN from +socket backlog (thus possibly in process context) + +Fixes: 06f877d613be ("tcp/dccp: fix other lockdep splats accessing ireq_opt") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_sock.h | 3 +-- + net/dccp/input.c | 4 +++- + net/ipv4/tcp_input.c | 4 +++- + 3 files changed, 7 insertions(+), 4 deletions(-) + +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -131,8 +131,7 @@ static inline int inet_request_bound_dev + + static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) + { +- return rcu_dereference_check(ireq->ireq_opt, +- refcount_read(&ireq->req.rsk_refcnt) > 0); ++ return rcu_dereference(ireq->ireq_opt); + } + + struct inet_cork { +--- a/net/dccp/input.c ++++ b/net/dccp/input.c +@@ -605,11 +605,13 @@ int dccp_rcv_state_process(struct sock * + if (sk->sk_state == DCCP_LISTEN) { + if (dh->dccph_type == DCCP_PKT_REQUEST) { + /* It is possible that we process SYN packets from backlog, +- * so we need to make sure to disable BH right there. ++ * so we need to make sure to disable BH and RCU right there. + */ ++ rcu_read_lock(); + local_bh_disable(); + acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0; + local_bh_enable(); ++ rcu_read_unlock(); + if (!acceptable) + return 1; + consume_skb(skb); +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -5913,11 +5913,13 @@ int tcp_rcv_state_process(struct sock *s + if (th->fin) + goto discard; + /* It is possible that we process SYN packets from backlog, +- * so we need to make sure to disable BH right there. ++ * so we need to make sure to disable BH and RCU right there. + */ ++ rcu_read_lock(); + local_bh_disable(); + acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; + local_bh_enable(); ++ rcu_read_unlock(); + + if (!acceptable) + return 1; diff --git a/queue-4.14/team-forbid-enslaving-team-device-to-itself.patch b/queue-4.14/team-forbid-enslaving-team-device-to-itself.patch new file mode 100644 index 00000000000..71b330f2e2a --- /dev/null +++ b/queue-4.14/team-forbid-enslaving-team-device-to-itself.patch @@ -0,0 +1,124 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Ido Schimmel +Date: Mon, 1 Oct 2018 12:21:59 +0300 +Subject: team: Forbid enslaving team device to itself + +From: Ido Schimmel + +[ Upstream commit 471b83bd8bbe4e89743683ef8ecb78f7029d8288 ] + +team's ndo_add_slave() acquires 'team->lock' and later tries to open the +newly enslaved device via dev_open(). This emits a 'NETDEV_UP' event +that causes the VLAN driver to add VLAN 0 on the team device. team's +ndo_vlan_rx_add_vid() will also try to acquire 'team->lock' and +deadlock. + +Fix this by checking early at the enslavement function that a team +device is not being enslaved to itself. + +A similar check was added to the bond driver in commit 09a89c219baf +("bonding: disallow enslaving a bond to itself"). + +WARNING: possible recursive locking detected +4.18.0-rc7+ #176 Not tainted +-------------------------------------------- +syz-executor4/6391 is trying to acquire lock: +(____ptrval____) (&team->lock){+.+.}, at: team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868 + +but task is already holding lock: +(____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947 + +other info that might help us debug this: + Possible unsafe locking scenario: + + CPU0 + ---- + lock(&team->lock); + lock(&team->lock); + + *** DEADLOCK *** + + May be due to missing lock nesting notation + +2 locks held by syz-executor4/6391: + #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline] + #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4662 + #1: (____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947 + +stack backtrace: +CPU: 1 PID: 6391 Comm: syz-executor4 Not tainted 4.18.0-rc7+ #176 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 + print_deadlock_bug kernel/locking/lockdep.c:1765 [inline] + check_deadlock kernel/locking/lockdep.c:1809 [inline] + validate_chain kernel/locking/lockdep.c:2405 [inline] + __lock_acquire.cold.64+0x1fb/0x486 kernel/locking/lockdep.c:3435 + lock_acquire+0x1e4/0x540 kernel/locking/lockdep.c:3924 + __mutex_lock_common kernel/locking/mutex.c:757 [inline] + __mutex_lock+0x176/0x1820 kernel/locking/mutex.c:894 + mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:909 + team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868 + vlan_add_rx_filter_info+0x14a/0x1d0 net/8021q/vlan_core.c:210 + __vlan_vid_add net/8021q/vlan_core.c:278 [inline] + vlan_vid_add+0x63e/0x9d0 net/8021q/vlan_core.c:308 + vlan_device_event.cold.12+0x2a/0x2f net/8021q/vlan.c:381 + notifier_call_chain+0x180/0x390 kernel/notifier.c:93 + __raw_notifier_call_chain kernel/notifier.c:394 [inline] + raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 + call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1735 + call_netdevice_notifiers net/core/dev.c:1753 [inline] + dev_open+0x173/0x1b0 net/core/dev.c:1433 + team_port_add drivers/net/team/team.c:1219 [inline] + team_add_slave+0xa8b/0x1c30 drivers/net/team/team.c:1948 + do_set_master+0x1c9/0x220 net/core/rtnetlink.c:2248 + do_setlink+0xba4/0x3e10 net/core/rtnetlink.c:2382 + rtnl_setlink+0x2a9/0x400 net/core/rtnetlink.c:2636 + rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4665 + netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2455 + rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4683 + netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] + netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 + netlink_sendmsg+0xa18/0xfd0 net/netlink/af_netlink.c:1908 + sock_sendmsg_nosec net/socket.c:642 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:652 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2126 + __sys_sendmsg+0x11d/0x290 net/socket.c:2164 + __do_sys_sendmsg net/socket.c:2173 [inline] + __se_sys_sendmsg net/socket.c:2171 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2171 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x456b29 +Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007f9706bf8c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f9706bf96d4 RCX: 0000000000456b29 +RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000004 +RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 00000000004d3548 R14: 00000000004c8227 R15: 0000000000000000 + +Fixes: 87002b03baab ("net: introduce vlan_vid_[add/del] and use them instead of direct [add/kill]_vid ndo calls") +Signed-off-by: Ido Schimmel +Reported-and-tested-by: syzbot+bd051aba086537515cdb@syzkaller.appspotmail.com +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/team/team.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -1165,6 +1165,11 @@ static int team_port_add(struct team *te + return -EBUSY; + } + ++ if (dev == port_dev) { ++ netdev_err(dev, "Cannot enslave team device to itself\n"); ++ return -EINVAL; ++ } ++ + if (port_dev->features & NETIF_F_VLAN_CHALLENGED && + vlan_uses_dev(dev)) { + netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n", diff --git a/queue-4.14/tipc-fix-flow-control-accounting-for-implicit-connect.patch b/queue-4.14/tipc-fix-flow-control-accounting-for-implicit-connect.patch new file mode 100644 index 00000000000..b6756592d9c --- /dev/null +++ b/queue-4.14/tipc-fix-flow-control-accounting-for-implicit-connect.patch @@ -0,0 +1,41 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Parthasarathy Bhuvaragan +Date: Tue, 25 Sep 2018 18:21:58 +0200 +Subject: tipc: fix flow control accounting for implicit connect + +From: Parthasarathy Bhuvaragan + +[ Upstream commit 92ef12b32feab8f277b69e9fb89ede2796777f4d ] + +In the case of implicit connect message with data > 1K, the flow +control accounting is incorrect. At this state, the socket does not +know the peer nodes capability and falls back to legacy flow control +by return 1, however the receiver of this message will perform the +new block accounting. This leads to a slack and eventually traffic +disturbance. + +In this commit, we perform tipc_node_get_capabilities() at implicit +connect and perform accounting based on the peer's capability. + +Signed-off-by: Parthasarathy Bhuvaragan +Signed-off-by: Jon Maloy +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/socket.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/tipc/socket.c ++++ b/net/tipc/socket.c +@@ -1063,8 +1063,10 @@ static int __tipc_sendstream(struct sock + /* Handle implicit connection setup */ + if (unlikely(dest)) { + rc = __tipc_sendmsg(sock, m, dlen); +- if (dlen && (dlen == rc)) ++ if (dlen && dlen == rc) { ++ tsk->peer_caps = tipc_node_get_capabilities(net, dnode); + tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr)); ++ } + return rc; + } + diff --git a/queue-4.14/udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch b/queue-4.14/udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch new file mode 100644 index 00000000000..496a5765483 --- /dev/null +++ b/queue-4.14/udp-unbreak-modules-that-rely-on-external-__skb_recv_udp-availability.patch @@ -0,0 +1,43 @@ +From foo@baz Tue Oct 16 07:06:52 CEST 2018 +From: Jiri Kosina +Date: Thu, 4 Oct 2018 13:37:32 +0200 +Subject: udp: Unbreak modules that rely on external __skb_recv_udp() availability + +From: Jiri Kosina + +[ Upstream commit 7e823644b60555f70f241274b8d0120dd919269a ] + +Commit 2276f58ac589 ("udp: use a separate rx queue for packet reception") +turned static inline __skb_recv_udp() from being a trivial helper around +__skb_recv_datagram() into a UDP specific implementaion, making it +EXPORT_SYMBOL_GPL() at the same time. + +There are external modules that got broken by __skb_recv_udp() not being +visible to them. Let's unbreak them by making __skb_recv_udp EXPORT_SYMBOL(). + +Rationale (one of those) why this is actually "technically correct" thing +to do: __skb_recv_udp() used to be an inline wrapper around +__skb_recv_datagram(), which itself (still, and correctly so, I believe) +is EXPORT_SYMBOL(). + +Cc: Paolo Abeni +Cc: Eric Dumazet +Fixes: 2276f58ac589 ("udp: use a separate rx queue for packet reception") +Signed-off-by: Jiri Kosina +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1565,7 +1565,7 @@ busy_check: + *err = error; + return NULL; + } +-EXPORT_SYMBOL_GPL(__skb_recv_udp); ++EXPORT_SYMBOL(__skb_recv_udp); + + /* + * This should be easy, if there is something there we