From: Greg Kroah-Hartman Date: Tue, 16 Oct 2018 15:13:29 +0000 (+0200) Subject: 4.9-stable patches X-Git-Tag: v4.9.134~11 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=35c484d575d3442d0bbc71eadd136e73773e5ea0;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: bnxt_en-fix-tx-timeout-during-netpoll.patch bonding-avoid-possible-dead-lock.patch inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch ip6_tunnel-be-careful-when-accessing-the-inner-header.patch ip_tunnel-be-careful-when-accessing-the-inner-header.patch ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch net-dsa-bcm_sf2-call-setup-during-switch-resume.patch net-dsa-bcm_sf2-fix-unbind-ordering.patch net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch net-systemport-fix-wake-up-interrupt-race-during-resume.patch net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch netlabel-check-for-ipv4mask-in-addrinfo_get.patch qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch team-forbid-enslaving-team-device-to-itself.patch --- diff --git a/queue-4.9/bnxt_en-fix-tx-timeout-during-netpoll.patch b/queue-4.9/bnxt_en-fix-tx-timeout-during-netpoll.patch new file mode 100644 index 00000000000..6472eac47e1 --- /dev/null +++ b/queue-4.9/bnxt_en-fix-tx-timeout-during-netpoll.patch @@ -0,0 +1,73 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Michael Chan +Date: Wed, 26 Sep 2018 00:41:04 -0400 +Subject: bnxt_en: Fix TX timeout during netpoll. + +From: Michael Chan + +[ Upstream commit 73f21c653f930f438d53eed29b5e4c65c8a0f906 ] + +The current netpoll implementation in the bnxt_en driver has problems +that may miss TX completion events. bnxt_poll_work() in effect is +only handling at most 1 TX packet before exiting. In addition, +there may be in flight TX completions that ->poll() may miss even +after we fix bnxt_poll_work() to handle all visible TX completions. +netpoll may not call ->poll() again and HW may not generate IRQ +because the driver does not ARM the IRQ when the budget (0 for netpoll) +is reached. + +We fix it by handling all TX completions and to always ARM the IRQ +when we exit ->poll() with 0 budget. + +Also, the logic to ACK the completion ring in case it is almost filled +with TX completions need to be adjusted to take care of the 0 budget +case, as discussed with Eric Dumazet + +Reported-by: Song Liu +Reviewed-by: Song Liu +Tested-by: Song Liu +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -1666,8 +1666,11 @@ static int bnxt_poll_work(struct bnxt *b + if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) { + tx_pkts++; + /* return full budget so NAPI will complete. */ +- if (unlikely(tx_pkts > bp->tx_wake_thresh)) ++ if (unlikely(tx_pkts > bp->tx_wake_thresh)) { + rx_pkts = budget; ++ raw_cons = NEXT_RAW_CMP(raw_cons); ++ break; ++ } + } else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { + rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &agg_event); + if (likely(rc >= 0)) +@@ -1685,7 +1688,7 @@ static int bnxt_poll_work(struct bnxt *b + } + raw_cons = NEXT_RAW_CMP(raw_cons); + +- if (rx_pkts == budget) ++ if (rx_pkts && rx_pkts == budget) + break; + } + +@@ -1797,8 +1800,12 @@ static int bnxt_poll(struct napi_struct + while (1) { + work_done += bnxt_poll_work(bp, bnapi, budget - work_done); + +- if (work_done >= budget) ++ if (work_done >= budget) { ++ if (!budget) ++ BNXT_CP_DB_REARM(cpr->cp_doorbell, ++ cpr->cp_raw_cons); + break; ++ } + + if (!bnxt_has_work(bp, cpr)) { + napi_complete(napi); diff --git a/queue-4.9/bonding-avoid-possible-dead-lock.patch b/queue-4.9/bonding-avoid-possible-dead-lock.patch new file mode 100644 index 00000000000..f51c3116a69 --- /dev/null +++ b/queue-4.9/bonding-avoid-possible-dead-lock.patch @@ -0,0 +1,244 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Mahesh Bandewar +Date: Mon, 24 Sep 2018 14:40:11 -0700 +Subject: bonding: avoid possible dead-lock + +From: Mahesh Bandewar + +[ Upstream commit d4859d749aa7090ffb743d15648adb962a1baeae ] + +Syzkaller reported this on a slightly older kernel but it's still +applicable to the current kernel - + +====================================================== +WARNING: possible circular locking dependency detected +4.18.0-next-20180823+ #46 Not tainted +------------------------------------------------------ +syz-executor4/26841 is trying to acquire lock: +00000000dd41ef48 ((wq_completion)bond_dev->name){+.+.}, at: flush_workqueue+0x2db/0x1e10 kernel/workqueue.c:2652 + +but task is already holding lock: +00000000768ab431 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline] +00000000768ab431 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4708 + +which lock already depends on the new lock. + +the existing dependency chain (in reverse order) is: + +-> #2 (rtnl_mutex){+.+.}: + __mutex_lock_common kernel/locking/mutex.c:925 [inline] + __mutex_lock+0x171/0x1700 kernel/locking/mutex.c:1073 + mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:1088 + rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77 + bond_netdev_notify drivers/net/bonding/bond_main.c:1310 [inline] + bond_netdev_notify_work+0x44/0xd0 drivers/net/bonding/bond_main.c:1320 + process_one_work+0xc73/0x1aa0 kernel/workqueue.c:2153 + worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 + kthread+0x35a/0x420 kernel/kthread.c:246 + ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 + +-> #1 ((work_completion)(&(&nnw->work)->work)){+.+.}: + process_one_work+0xc0b/0x1aa0 kernel/workqueue.c:2129 + worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 + kthread+0x35a/0x420 kernel/kthread.c:246 + ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 + +-> #0 ((wq_completion)bond_dev->name){+.+.}: + lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 + flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 + drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 + destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 + __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 + bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 + register_netdevice+0x337/0x1100 net/core/dev.c:8410 + bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 + rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 + rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 + netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 + rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 + netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] + netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 + netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 + sock_sendmsg_nosec net/socket.c:622 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:632 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 + __sys_sendmsg+0x11d/0x290 net/socket.c:2153 + __do_sys_sendmsg net/socket.c:2162 [inline] + __se_sys_sendmsg net/socket.c:2160 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +other info that might help us debug this: + +Chain exists of: + (wq_completion)bond_dev->name --> (work_completion)(&(&nnw->work)->work) --> rtnl_mutex + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(rtnl_mutex); + lock((work_completion)(&(&nnw->work)->work)); + lock(rtnl_mutex); + lock((wq_completion)bond_dev->name); + + *** DEADLOCK *** + +1 lock held by syz-executor4/26841: + +stack backtrace: +CPU: 1 PID: 26841 Comm: syz-executor4 Not tainted 4.18.0-next-20180823+ #46 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 + print_circular_bug.isra.34.cold.55+0x1bd/0x27d kernel/locking/lockdep.c:1222 + check_prev_add kernel/locking/lockdep.c:1862 [inline] + check_prevs_add kernel/locking/lockdep.c:1975 [inline] + validate_chain kernel/locking/lockdep.c:2416 [inline] + __lock_acquire+0x3449/0x5020 kernel/locking/lockdep.c:3412 + lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 + flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 + drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 + destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 + __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 + bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 + register_netdevice+0x337/0x1100 net/core/dev.c:8410 + bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 + rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 + rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 + netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 + rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 + netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] + netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 + netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 + sock_sendmsg_nosec net/socket.c:622 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:632 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 + __sys_sendmsg+0x11d/0x290 net/socket.c:2153 + __do_sys_sendmsg net/socket.c:2162 [inline] + __se_sys_sendmsg net/socket.c:2160 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x457089 +Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007f2df20a5c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f2df20a66d4 RCX: 0000000000457089 +RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 +RBP: 0000000000930140 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 00000000004d40b8 R14: 00000000004c8ad8 R15: 0000000000000001 + +Signed-off-by: Mahesh Bandewar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 43 +++++++++++++++------------------------- + include/net/bonding.h | 7 ------ + 2 files changed, 18 insertions(+), 32 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -216,6 +216,7 @@ static struct rtnl_link_stats64 *bond_ge + static void bond_slave_arr_handler(struct work_struct *work); + static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, + int mod); ++static void bond_netdev_notify_work(struct work_struct *work); + + /*---------------------------- General routines -----------------------------*/ + +@@ -1250,6 +1251,8 @@ static struct slave *bond_alloc_slave(st + return NULL; + } + } ++ INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); ++ + return slave; + } + +@@ -1257,6 +1260,7 @@ static void bond_free_slave(struct slave + { + struct bonding *bond = bond_get_bond_by_slave(slave); + ++ cancel_delayed_work_sync(&slave->notify_work); + if (BOND_MODE(bond) == BOND_MODE_8023AD) + kfree(SLAVE_AD_INFO(slave)); + +@@ -1278,39 +1282,26 @@ static void bond_fill_ifslave(struct sla + info->link_failure_count = slave->link_failure_count; + } + +-static void bond_netdev_notify(struct net_device *dev, +- struct netdev_bonding_info *info) +-{ +- rtnl_lock(); +- netdev_bonding_info_change(dev, info); +- rtnl_unlock(); +-} +- + static void bond_netdev_notify_work(struct work_struct *_work) + { +- struct netdev_notify_work *w = +- container_of(_work, struct netdev_notify_work, work.work); ++ struct slave *slave = container_of(_work, struct slave, ++ notify_work.work); ++ ++ if (rtnl_trylock()) { ++ struct netdev_bonding_info binfo; + +- bond_netdev_notify(w->dev, &w->bonding_info); +- dev_put(w->dev); +- kfree(w); ++ bond_fill_ifslave(slave, &binfo.slave); ++ bond_fill_ifbond(slave->bond, &binfo.master); ++ netdev_bonding_info_change(slave->dev, &binfo); ++ rtnl_unlock(); ++ } else { ++ queue_delayed_work(slave->bond->wq, &slave->notify_work, 1); ++ } + } + + void bond_queue_slave_event(struct slave *slave) + { +- struct bonding *bond = slave->bond; +- struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC); +- +- if (!nnw) +- return; +- +- dev_hold(slave->dev); +- nnw->dev = slave->dev; +- bond_fill_ifslave(slave, &nnw->bonding_info.slave); +- bond_fill_ifbond(bond, &nnw->bonding_info.master); +- INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work); +- +- queue_delayed_work(slave->bond->wq, &nnw->work, 0); ++ queue_delayed_work(slave->bond->wq, &slave->notify_work, 0); + } + + void bond_lower_state_changed(struct slave *slave) +--- a/include/net/bonding.h ++++ b/include/net/bonding.h +@@ -139,12 +139,6 @@ struct bond_parm_tbl { + int mode; + }; + +-struct netdev_notify_work { +- struct delayed_work work; +- struct net_device *dev; +- struct netdev_bonding_info bonding_info; +-}; +- + struct slave { + struct net_device *dev; /* first - useful for panic debug */ + struct bonding *bond; /* our master */ +@@ -171,6 +165,7 @@ struct slave { + #ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *np; + #endif ++ struct delayed_work notify_work; + struct kobject kobj; + struct rtnl_link_stats64 slave_stats; + }; diff --git a/queue-4.9/inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch b/queue-4.9/inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch new file mode 100644 index 00000000000..a05270b0fef --- /dev/null +++ b/queue-4.9/inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch @@ -0,0 +1,101 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Eric Dumazet +Date: Tue, 2 Oct 2018 12:35:05 -0700 +Subject: inet: make sure to grab rcu_read_lock before using ireq->ireq_opt + +From: Eric Dumazet + +[ Upstream commit 2ab2ddd301a22ca3c5f0b743593e4ad2953dfa53 ] + +Timer handlers do not imply rcu_read_lock(), so my recent fix +triggered a LOCKDEP warning when SYNACK is retransmit. + +Lets add rcu_read_lock()/rcu_read_unlock() pairs around ireq->ireq_opt +usages instead of guessing what is done by callers, since it is +not worth the pain. + +Get rid of ireq_opt_deref() helper since it hides the logic +without real benefit, since it is now a standard rcu_dereference(). + +Fixes: 1ad98e9d1bdf ("tcp/dccp: fix lockdep issue when SYN is backlogged") +Signed-off-by: Eric Dumazet +Reported-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_sock.h | 5 ----- + net/dccp/ipv4.c | 4 +++- + net/ipv4/inet_connection_sock.c | 5 ++++- + net/ipv4/tcp_ipv4.c | 4 +++- + 4 files changed, 10 insertions(+), 8 deletions(-) + +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -132,11 +132,6 @@ static inline int inet_request_bound_dev + return sk->sk_bound_dev_if; + } + +-static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) +-{ +- return rcu_dereference(ireq->ireq_opt); +-} +- + struct inet_cork { + unsigned int flags; + __be32 addr; +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -493,9 +493,11 @@ static int dccp_v4_send_response(const s + + dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, + ireq->ir_rmt_addr); ++ rcu_read_lock(); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- ireq_opt_deref(ireq)); ++ rcu_dereference(ireq->ireq_opt)); ++ rcu_read_unlock(); + err = net_xmit_eval(err); + } + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -410,7 +410,8 @@ struct dst_entry *inet_csk_route_req(con + struct ip_options_rcu *opt; + struct rtable *rt; + +- opt = ireq_opt_deref(ireq); ++ rcu_read_lock(); ++ opt = rcu_dereference(ireq->ireq_opt); + + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, +@@ -424,11 +425,13 @@ struct dst_entry *inet_csk_route_req(con + goto no_route; + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) + goto route_err; ++ rcu_read_unlock(); + return &rt->dst; + + route_err: + ip_rt_put(rt); + no_route: ++ rcu_read_unlock(); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); + return NULL; + } +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -859,9 +859,11 @@ static int tcp_v4_send_synack(const stru + if (skb) { + __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + ++ rcu_read_lock(); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- ireq_opt_deref(ireq)); ++ rcu_dereference(ireq->ireq_opt)); ++ rcu_read_unlock(); + err = net_xmit_eval(err); + } + diff --git a/queue-4.9/ip6_tunnel-be-careful-when-accessing-the-inner-header.patch b/queue-4.9/ip6_tunnel-be-careful-when-accessing-the-inner-header.patch new file mode 100644 index 00000000000..7cb56a3bcd4 --- /dev/null +++ b/queue-4.9/ip6_tunnel-be-careful-when-accessing-the-inner-header.patch @@ -0,0 +1,136 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Paolo Abeni +Date: Wed, 19 Sep 2018 15:02:07 +0200 +Subject: ip6_tunnel: be careful when accessing the inner header + +From: Paolo Abeni + +[ Upstream commit 76c0ddd8c3a683f6e2c6e60e11dc1a1558caf4bc ] + +the ip6 tunnel xmit ndo assumes that the processed skb always +contains an ip[v6] header, but syzbot has found a way to send +frames that fall short of this assumption, leading to the following splat: + +BUG: KMSAN: uninit-value in ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 +[inline] +BUG: KMSAN: uninit-value in ip6_tnl_start_xmit+0x7d2/0x1ef0 +net/ipv6/ip6_tunnel.c:1390 +CPU: 0 PID: 4504 Comm: syz-executor558 Not tainted 4.16.0+ #87 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS +Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:17 [inline] + dump_stack+0x185/0x1d0 lib/dump_stack.c:53 + kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 + __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683 + ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 [inline] + ip6_tnl_start_xmit+0x7d2/0x1ef0 net/ipv6/ip6_tunnel.c:1390 + __netdev_start_xmit include/linux/netdevice.h:4066 [inline] + netdev_start_xmit include/linux/netdevice.h:4075 [inline] + xmit_one net/core/dev.c:3026 [inline] + dev_hard_start_xmit+0x5f1/0xc70 net/core/dev.c:3042 + __dev_queue_xmit+0x27ee/0x3520 net/core/dev.c:3557 + dev_queue_xmit+0x4b/0x60 net/core/dev.c:3590 + packet_snd net/packet/af_packet.c:2944 [inline] + packet_sendmsg+0x7c70/0x8a30 net/packet/af_packet.c:2969 + sock_sendmsg_nosec net/socket.c:630 [inline] + sock_sendmsg net/socket.c:640 [inline] + ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 + __sys_sendmmsg+0x42d/0x800 net/socket.c:2136 + SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167 + SyS_sendmmsg+0x63/0x90 net/socket.c:2162 + do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 +RIP: 0033:0x441819 +RSP: 002b:00007ffe58ee8268 EFLAGS: 00000213 ORIG_RAX: 0000000000000133 +RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000441819 +RDX: 0000000000000002 RSI: 0000000020000100 RDI: 0000000000000003 +RBP: 00000000006cd018 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000213 R12: 0000000000402510 +R13: 00000000004025a0 R14: 0000000000000000 R15: 0000000000000000 + +Uninit was created at: + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] + kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 + kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 + kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 + slab_post_alloc_hook mm/slab.h:445 [inline] + slab_alloc_node mm/slub.c:2737 [inline] + __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 + __kmalloc_reserve net/core/skbuff.c:138 [inline] + __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 + alloc_skb include/linux/skbuff.h:984 [inline] + alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234 + sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085 + packet_alloc_skb net/packet/af_packet.c:2803 [inline] + packet_snd net/packet/af_packet.c:2894 [inline] + packet_sendmsg+0x6454/0x8a30 net/packet/af_packet.c:2969 + sock_sendmsg_nosec net/socket.c:630 [inline] + sock_sendmsg net/socket.c:640 [inline] + ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 + __sys_sendmmsg+0x42d/0x800 net/socket.c:2136 + SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167 + SyS_sendmmsg+0x63/0x90 net/socket.c:2162 + do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 + +This change addresses the issue adding the needed check before +accessing the inner header. + +The ipv4 side of the issue is apparently there since the ipv4 over ipv6 +initial support, and the ipv6 side predates git history. + +Fixes: c4d3efafcc93 ("[IPV6] IP6TUNNEL: Add support to IPv4 over IPv6 tunnel.") +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: syzbot+3fde91d4d394747d6db4@syzkaller.appspotmail.com +Tested-by: Alexander Potapenko +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1226,7 +1226,7 @@ static inline int + ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct ip6_tnl *t = netdev_priv(dev); +- const struct iphdr *iph = ip_hdr(skb); ++ const struct iphdr *iph; + int encap_limit = -1; + struct flowi6 fl6; + __u8 dsfield; +@@ -1234,6 +1234,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str + u8 tproto; + int err; + ++ /* ensure we can access the full inner ip header */ ++ if (!pskb_may_pull(skb, sizeof(struct iphdr))) ++ return -1; ++ ++ iph = ip_hdr(skb); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + + tproto = ACCESS_ONCE(t->parms.proto); +@@ -1293,7 +1298,7 @@ static inline int + ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct ip6_tnl *t = netdev_priv(dev); +- struct ipv6hdr *ipv6h = ipv6_hdr(skb); ++ struct ipv6hdr *ipv6h; + int encap_limit = -1; + __u16 offset; + struct flowi6 fl6; +@@ -1302,6 +1307,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str + u8 tproto; + int err; + ++ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) ++ return -1; ++ ++ ipv6h = ipv6_hdr(skb); + tproto = ACCESS_ONCE(t->parms.proto); + if ((tproto != IPPROTO_IPV6 && tproto != 0) || + ip6_tnl_addr_conflict(t, ipv6h)) diff --git a/queue-4.9/ip_tunnel-be-careful-when-accessing-the-inner-header.patch b/queue-4.9/ip_tunnel-be-careful-when-accessing-the-inner-header.patch new file mode 100644 index 00000000000..349a6b17eff --- /dev/null +++ b/queue-4.9/ip_tunnel-be-careful-when-accessing-the-inner-header.patch @@ -0,0 +1,47 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Paolo Abeni +Date: Mon, 24 Sep 2018 15:48:19 +0200 +Subject: ip_tunnel: be careful when accessing the inner header + +From: Paolo Abeni + +[ Upstream commit ccfec9e5cb2d48df5a955b7bf47f7782157d3bc2] + +Cong noted that we need the same checks introduced by commit 76c0ddd8c3a6 +("ip6_tunnel: be careful when accessing the inner header") +even for ipv4 tunnels. + +Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") +Suggested-by: Cong Wang +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -627,6 +627,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, + const struct iphdr *tnl_params, u8 protocol) + { + struct ip_tunnel *tunnel = netdev_priv(dev); ++ unsigned int inner_nhdr_len = 0; + const struct iphdr *inner_iph; + struct flowi4 fl4; + u8 tos, ttl; +@@ -636,6 +637,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, + __be32 dst; + bool connected; + ++ /* ensure we can access the inner net header, for several users below */ ++ if (skb->protocol == htons(ETH_P_IP)) ++ inner_nhdr_len = sizeof(struct iphdr); ++ else if (skb->protocol == htons(ETH_P_IPV6)) ++ inner_nhdr_len = sizeof(struct ipv6hdr); ++ if (unlikely(!pskb_may_pull(skb, inner_nhdr_len))) ++ goto tx_error; ++ + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + connected = (tunnel->parms.iph.daddr != 0); + diff --git a/queue-4.9/ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch b/queue-4.9/ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch new file mode 100644 index 00000000000..0a24f96f66a --- /dev/null +++ b/queue-4.9/ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch @@ -0,0 +1,42 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Eric Dumazet +Date: Sun, 30 Sep 2018 11:33:39 -0700 +Subject: ipv4: fix use-after-free in ip_cmsg_recv_dstaddr() + +From: Eric Dumazet + +[ Upstream commit 64199fc0a46ba211362472f7f942f900af9492fd ] + +Caching ip_hdr(skb) before a call to pskb_may_pull() is buggy, +do not do it. + +Fixes: 2efd4fca703a ("ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull") +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Reported-by: syzbot +Acked-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_sockglue.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/ipv4/ip_sockglue.c ++++ b/net/ipv4/ip_sockglue.c +@@ -134,7 +134,6 @@ static void ip_cmsg_recv_security(struct + static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) + { + struct sockaddr_in sin; +- const struct iphdr *iph = ip_hdr(skb); + __be16 *ports; + int end; + +@@ -149,7 +148,7 @@ static void ip_cmsg_recv_dstaddr(struct + ports = (__be16 *)skb_transport_header(skb); + + sin.sin_family = AF_INET; +- sin.sin_addr.s_addr = iph->daddr; ++ sin.sin_addr.s_addr = ip_hdr(skb)->daddr; + sin.sin_port = ports[1]; + memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); + diff --git a/queue-4.9/ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch b/queue-4.9/ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch new file mode 100644 index 00000000000..bc31cb2fbf1 --- /dev/null +++ b/queue-4.9/ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch @@ -0,0 +1,169 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Wei Wang +Date: Thu, 4 Oct 2018 10:12:37 -0700 +Subject: ipv6: take rcu lock in rawv6_send_hdrinc() + +From: Wei Wang + +[ Upstream commit a688caa34beb2fd2a92f1b6d33e40cde433ba160 ] + +In rawv6_send_hdrinc(), in order to avoid an extra dst_hold(), we +directly assign the dst to skb and set passed in dst to NULL to avoid +double free. +However, in error case, we free skb and then do stats update with the +dst pointer passed in. This causes use-after-free on the dst. +Fix it by taking rcu read lock right before dst could get released to +make sure dst does not get freed until the stats update is done. +Note: we don't have this issue in ipv4 cause dst is not used for stats +update in v4. + +Syzkaller reported following crash: +BUG: KASAN: use-after-free in rawv6_send_hdrinc net/ipv6/raw.c:692 [inline] +BUG: KASAN: use-after-free in rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921 +Read of size 8 at addr ffff8801d95ba730 by task syz-executor0/32088 + +CPU: 1 PID: 32088 Comm: syz-executor0 Not tainted 4.19.0-rc2+ #93 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 + print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 + kasan_report_error mm/kasan/report.c:354 [inline] + kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 + __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 + rawv6_send_hdrinc net/ipv6/raw.c:692 [inline] + rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921 + inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 + sock_sendmsg_nosec net/socket.c:621 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:631 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114 + __sys_sendmsg+0x11d/0x280 net/socket.c:2152 + __do_sys_sendmsg net/socket.c:2161 [inline] + __se_sys_sendmsg net/socket.c:2159 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x457099 +Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007f83756edc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f83756ee6d4 RCX: 0000000000457099 +RDX: 0000000000000000 RSI: 0000000020003840 RDI: 0000000000000004 +RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 00000000004d4b30 R14: 00000000004c90b1 R15: 0000000000000000 + +Allocated by task 32088: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 + kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 + kmem_cache_alloc+0x12e/0x730 mm/slab.c:3554 + dst_alloc+0xbb/0x1d0 net/core/dst.c:105 + ip6_dst_alloc+0x35/0xa0 net/ipv6/route.c:353 + ip6_rt_cache_alloc+0x247/0x7b0 net/ipv6/route.c:1186 + ip6_pol_route+0x8f8/0xd90 net/ipv6/route.c:1895 + ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2093 + fib6_rule_lookup+0x277/0x860 net/ipv6/fib6_rules.c:122 + ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2121 + ip6_route_output include/net/ip6_route.h:88 [inline] + ip6_dst_lookup_tail+0xe27/0x1d60 net/ipv6/ip6_output.c:951 + ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079 + rawv6_sendmsg+0x12d9/0x4630 net/ipv6/raw.c:905 + inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 + sock_sendmsg_nosec net/socket.c:621 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:631 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114 + __sys_sendmsg+0x11d/0x280 net/socket.c:2152 + __do_sys_sendmsg net/socket.c:2161 [inline] + __se_sys_sendmsg net/socket.c:2159 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +Freed by task 5356: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 + kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 + __cache_free mm/slab.c:3498 [inline] + kmem_cache_free+0x83/0x290 mm/slab.c:3756 + dst_destroy+0x267/0x3c0 net/core/dst.c:141 + dst_destroy_rcu+0x16/0x19 net/core/dst.c:154 + __rcu_reclaim kernel/rcu/rcu.h:236 [inline] + rcu_do_batch kernel/rcu/tree.c:2576 [inline] + invoke_rcu_callbacks kernel/rcu/tree.c:2880 [inline] + __rcu_process_callbacks kernel/rcu/tree.c:2847 [inline] + rcu_process_callbacks+0xf23/0x2670 kernel/rcu/tree.c:2864 + __do_softirq+0x30b/0xad8 kernel/softirq.c:292 + +Fixes: 1789a640f556 ("raw: avoid two atomics in xmit") +Signed-off-by: Wei Wang +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/raw.c | 29 ++++++++++++++++++++--------- + 1 file changed, 20 insertions(+), 9 deletions(-) + +--- a/net/ipv6/raw.c ++++ b/net/ipv6/raw.c +@@ -645,8 +645,6 @@ static int rawv6_send_hdrinc(struct sock + skb->protocol = htons(ETH_P_IPV6); + skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; +- skb_dst_set(skb, &rt->dst); +- *dstp = NULL; + + skb_put(skb, length); + skb_reset_network_header(skb); +@@ -656,8 +654,14 @@ static int rawv6_send_hdrinc(struct sock + + skb->transport_header = skb->network_header; + err = memcpy_from_msg(iph, msg, length); +- if (err) +- goto error_fault; ++ if (err) { ++ err = -EFAULT; ++ kfree_skb(skb); ++ goto error; ++ } ++ ++ skb_dst_set(skb, &rt->dst); ++ *dstp = NULL; + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing +@@ -666,21 +670,28 @@ static int rawv6_send_hdrinc(struct sock + if (unlikely(!skb)) + return 0; + ++ /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev ++ * in the error path. Since skb has been freed, the dst could ++ * have been queued for deletion. ++ */ ++ rcu_read_lock(); + IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, + NULL, rt->dst.dev, dst_output); + if (err > 0) + err = net_xmit_errno(err); +- if (err) +- goto error; ++ if (err) { ++ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); ++ rcu_read_unlock(); ++ goto error_check; ++ } ++ rcu_read_unlock(); + out: + return 0; + +-error_fault: +- err = -EFAULT; +- kfree_skb(skb); + error: + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); ++error_check: + if (err == -ENOBUFS && !np->recverr) + err = 0; + return err; diff --git a/queue-4.9/net-dsa-bcm_sf2-call-setup-during-switch-resume.patch b/queue-4.9/net-dsa-bcm_sf2-call-setup-during-switch-resume.patch new file mode 100644 index 00000000000..4b059da2720 --- /dev/null +++ b/queue-4.9/net-dsa-bcm_sf2-call-setup-during-switch-resume.patch @@ -0,0 +1,47 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Florian Fainelli +Date: Tue, 9 Oct 2018 16:48:58 -0700 +Subject: net: dsa: bcm_sf2: Call setup during switch resume + +From: Florian Fainelli + +[ Upstream commit 54baca096386d862d19c10f58f34bf787c6b3cbe ] + +There is no reason to open code what the switch setup function does, in +fact, because we just issued a switch reset, we would make all the +register get their default values, including for instance, having unused +port be enabled again and wasting power and leading to an inappropriate +switch core clock being selected. + +Fixes: 8cfa94984c9c ("net: dsa: bcm_sf2: add suspend/resume callbacks") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -744,7 +744,6 @@ static int bcm_sf2_sw_suspend(struct dsa + static int bcm_sf2_sw_resume(struct dsa_switch *ds) + { + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); +- unsigned int port; + int ret; + + ret = bcm_sf2_sw_rst(priv); +@@ -756,12 +755,7 @@ static int bcm_sf2_sw_resume(struct dsa_ + if (priv->hw_params.num_gphy == 1) + bcm_sf2_gphy_enable_set(ds, true); + +- for (port = 0; port < DSA_MAX_PORTS; port++) { +- if ((1 << port) & ds->enabled_port_mask) +- bcm_sf2_port_setup(ds, port, NULL); +- else if (dsa_is_cpu_port(ds, port)) +- bcm_sf2_imp_setup(ds, port); +- } ++ ds->ops->setup(ds); + + return 0; + } diff --git a/queue-4.9/net-dsa-bcm_sf2-fix-unbind-ordering.patch b/queue-4.9/net-dsa-bcm_sf2-fix-unbind-ordering.patch new file mode 100644 index 00000000000..3323b0fdd33 --- /dev/null +++ b/queue-4.9/net-dsa-bcm_sf2-fix-unbind-ordering.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Florian Fainelli +Date: Tue, 9 Oct 2018 16:48:57 -0700 +Subject: net: dsa: bcm_sf2: Fix unbind ordering + +From: Florian Fainelli + +[ Upstream commit bf3b452b7af787b8bf27de6490dc4eedf6f97599 ] + +The order in which we release resources is unfortunately leading to bus +errors while dismantling the port. This is because we set +priv->wol_ports_mask to 0 to tell bcm_sf2_sw_suspend() that it is now +permissible to clock gate the switch. Later on, when dsa_slave_destroy() +comes in from dsa_unregister_switch() and calls +dsa_switch_ops::port_disable, we perform the same dismantling again, and +this time we hit registers that are clock gated. + +Make sure that dsa_unregister_switch() is the first thing that happens, +which takes care of releasing all user visible resources, then proceed +with clock gating hardware. We still need to set priv->wol_ports_mask to +0 to make sure that an enabled port properly gets disabled in case it +was previously used as part of Wake-on-LAN. + +Fixes: d9338023fb8e ("net: dsa: bcm_sf2: Make it a real platform device driver") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -1129,10 +1129,10 @@ static int bcm_sf2_sw_remove(struct plat + { + struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + +- /* Disable all ports and interrupts */ + priv->wol_ports_mask = 0; +- bcm_sf2_sw_suspend(priv->dev->ds); + dsa_unregister_switch(priv->dev->ds); ++ /* Disable all ports and interrupts */ ++ bcm_sf2_sw_suspend(priv->dev->ds); + bcm_sf2_mdio_unregister(priv); + + return 0; diff --git a/queue-4.9/net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch b/queue-4.9/net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch new file mode 100644 index 00000000000..82c228e6a18 --- /dev/null +++ b/queue-4.9/net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch @@ -0,0 +1,102 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Yunsheng Lin +Date: Tue, 25 Sep 2018 10:21:55 +0100 +Subject: net: hns: fix for unmapping problem when SMMU is on + +From: Yunsheng Lin + +[ Upstream commit 2e9361efa707e186d91b938e44f9e326725259f7 ] + +If SMMU is on, there is more likely that skb_shinfo(skb)->frags[i] +can not send by a single BD. when this happen, the +hns_nic_net_xmit_hw function map the whole data in a frags using +skb_frag_dma_map, but unmap each BD' data individually when tx is +done, which causes problem when SMMU is on. + +This patch fixes this problem by ummapping the whole data in a +frags when tx is done. + +Signed-off-by: Yunsheng Lin +Signed-off-by: Peng Li +Reviewed-by: Yisen Zhuang +Signed-off-by: Salil Mehta +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/hisilicon/hns/hnae.c | 2 - + drivers/net/ethernet/hisilicon/hns/hns_enet.c | 30 ++++++++++++++++---------- + 2 files changed, 20 insertions(+), 12 deletions(-) + +--- a/drivers/net/ethernet/hisilicon/hns/hnae.c ++++ b/drivers/net/ethernet/hisilicon/hns/hnae.c +@@ -80,7 +80,7 @@ static void hnae_unmap_buffer(struct hna + if (cb->type == DESC_TYPE_SKB) + dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length, + ring_to_dma_dir(ring)); +- else ++ else if (cb->length) + dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length, + ring_to_dma_dir(ring)); + } +--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c ++++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c +@@ -39,9 +39,9 @@ + #define SKB_TMP_LEN(SKB) \ + (((SKB)->transport_header - (SKB)->mac_header) + tcp_hdrlen(SKB)) + +-static void fill_v2_desc(struct hnae_ring *ring, void *priv, +- int size, dma_addr_t dma, int frag_end, +- int buf_num, enum hns_desc_type type, int mtu) ++static void fill_v2_desc_hw(struct hnae_ring *ring, void *priv, int size, ++ int send_sz, dma_addr_t dma, int frag_end, ++ int buf_num, enum hns_desc_type type, int mtu) + { + struct hnae_desc *desc = &ring->desc[ring->next_to_use]; + struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; +@@ -63,7 +63,7 @@ static void fill_v2_desc(struct hnae_rin + desc_cb->type = type; + + desc->addr = cpu_to_le64(dma); +- desc->tx.send_size = cpu_to_le16((u16)size); ++ desc->tx.send_size = cpu_to_le16((u16)send_sz); + + /* config bd buffer end */ + hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1); +@@ -132,6 +132,14 @@ static void fill_v2_desc(struct hnae_rin + ring_ptr_move_fw(ring, next_to_use); + } + ++static void fill_v2_desc(struct hnae_ring *ring, void *priv, ++ int size, dma_addr_t dma, int frag_end, ++ int buf_num, enum hns_desc_type type, int mtu) ++{ ++ fill_v2_desc_hw(ring, priv, size, size, dma, frag_end, ++ buf_num, type, mtu); ++} ++ + static const struct acpi_device_id hns_enet_acpi_match[] = { + { "HISI00C1", 0 }, + { "HISI00C2", 0 }, +@@ -288,15 +296,15 @@ static void fill_tso_desc(struct hnae_ri + + /* when the frag size is bigger than hardware, split this frag */ + for (k = 0; k < frag_buf_num; k++) +- fill_v2_desc(ring, priv, +- (k == frag_buf_num - 1) ? ++ fill_v2_desc_hw(ring, priv, k == 0 ? size : 0, ++ (k == frag_buf_num - 1) ? + sizeoflast : BD_MAX_SEND_SIZE, +- dma + BD_MAX_SEND_SIZE * k, +- frag_end && (k == frag_buf_num - 1) ? 1 : 0, +- buf_num, +- (type == DESC_TYPE_SKB && !k) ? ++ dma + BD_MAX_SEND_SIZE * k, ++ frag_end && (k == frag_buf_num - 1) ? 1 : 0, ++ buf_num, ++ (type == DESC_TYPE_SKB && !k) ? + DESC_TYPE_SKB : DESC_TYPE_PAGE, +- mtu); ++ mtu); + } + + netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, diff --git a/queue-4.9/net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch b/queue-4.9/net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch new file mode 100644 index 00000000000..ac16da14bbb --- /dev/null +++ b/queue-4.9/net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch @@ -0,0 +1,218 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Sabrina Dubroca +Date: Tue, 9 Oct 2018 17:48:14 +0200 +Subject: net: ipv4: update fnhe_pmtu when first hop's MTU changes + +From: Sabrina Dubroca + +[ Upstream commit af7d6cce53694a88d6a1bb60c9a239a6a5144459 ] + +Since commit 5aad1de5ea2c ("ipv4: use separate genid for next hop +exceptions"), exceptions get deprecated separately from cached +routes. In particular, administrative changes don't clear PMTU anymore. + +As Stefano described in commit e9fa1495d738 ("ipv6: Reflect MTU changes +on PMTU of exceptions for MTU-less routes"), the PMTU discovered before +the local MTU change can become stale: + - if the local MTU is now lower than the PMTU, that PMTU is now + incorrect + - if the local MTU was the lowest value in the path, and is increased, + we might discover a higher PMTU + +Similarly to what commit e9fa1495d738 did for IPv6, update PMTU in those +cases. + +If the exception was locked, the discovered PMTU was smaller than the +minimal accepted PMTU. In that case, if the new local MTU is smaller +than the current PMTU, let PMTU discovery figure out if locking of the +exception is still needed. + +To do this, we need to know the old link MTU in the NETDEV_CHANGEMTU +notifier. By the time the notifier is called, dev->mtu has been +changed. This patch adds the old MTU as additional information in the +notifier structure, and a new call_netdevice_notifiers_u32() function. + +Fixes: 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions") +Signed-off-by: Sabrina Dubroca +Reviewed-by: Stefano Brivio +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 7 ++++++ + include/net/ip_fib.h | 1 + net/core/dev.c | 28 +++++++++++++++++++++++-- + net/ipv4/fib_frontend.c | 12 +++++++---- + net/ipv4/fib_semantics.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++ + 5 files changed, 92 insertions(+), 6 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -2338,6 +2338,13 @@ struct netdev_notifier_info { + struct net_device *dev; + }; + ++struct netdev_notifier_info_ext { ++ struct netdev_notifier_info info; /* must be first */ ++ union { ++ u32 mtu; ++ } ext; ++}; ++ + struct netdev_notifier_change_info { + struct netdev_notifier_info info; /* must be first */ + unsigned int flags_changed; +--- a/include/net/ip_fib.h ++++ b/include/net/ip_fib.h +@@ -363,6 +363,7 @@ int ip_fib_check_default(__be32 gw, stru + int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); + int fib_sync_down_addr(struct net_device *dev, __be32 local); + int fib_sync_up(struct net_device *dev, unsigned int nh_flags); ++void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); + + extern u32 fib_multipath_secret __read_mostly; + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1664,6 +1664,28 @@ int call_netdevice_notifiers(unsigned lo + } + EXPORT_SYMBOL(call_netdevice_notifiers); + ++/** ++ * call_netdevice_notifiers_mtu - call all network notifier blocks ++ * @val: value passed unmodified to notifier function ++ * @dev: net_device pointer passed unmodified to notifier function ++ * @arg: additional u32 argument passed to the notifier function ++ * ++ * Call all network notifier blocks. Parameters and return value ++ * are as for raw_notifier_call_chain(). ++ */ ++static int call_netdevice_notifiers_mtu(unsigned long val, ++ struct net_device *dev, u32 arg) ++{ ++ struct netdev_notifier_info_ext info = { ++ .info.dev = dev, ++ .ext.mtu = arg, ++ }; ++ ++ BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0); ++ ++ return call_netdevice_notifiers_info(val, dev, &info.info); ++} ++ + #ifdef CONFIG_NET_INGRESS + static struct static_key ingress_needed __read_mostly; + +@@ -6589,14 +6611,16 @@ int dev_set_mtu(struct net_device *dev, + err = __dev_set_mtu(dev, new_mtu); + + if (!err) { +- err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); ++ err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, ++ orig_mtu); + err = notifier_to_errno(err); + if (err) { + /* setting mtu back and notifying everyone again, + * so that they have a chance to revert changes. + */ + __dev_set_mtu(dev, orig_mtu); +- call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); ++ call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, ++ new_mtu); + } + } + return err; +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -1171,7 +1171,8 @@ static int fib_inetaddr_event(struct not + static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) + { + struct net_device *dev = netdev_notifier_info_to_dev(ptr); +- struct netdev_notifier_changeupper_info *info; ++ struct netdev_notifier_changeupper_info *upper_info = ptr; ++ struct netdev_notifier_info_ext *info_ext = ptr; + struct in_device *in_dev; + struct net *net = dev_net(dev); + unsigned int flags; +@@ -1206,16 +1207,19 @@ static int fib_netdev_event(struct notif + fib_sync_up(dev, RTNH_F_LINKDOWN); + else + fib_sync_down_dev(dev, event, false); +- /* fall through */ ++ rt_cache_flush(net); ++ break; + case NETDEV_CHANGEMTU: ++ fib_sync_mtu(dev, info_ext->ext.mtu); + rt_cache_flush(net); + break; + case NETDEV_CHANGEUPPER: +- info = ptr; ++ upper_info = ptr; + /* flush all routes if dev is linked to or unlinked from + * an L3 master device (e.g., VRF) + */ +- if (info->upper_dev && netif_is_l3_master(info->upper_dev)) ++ if (upper_info->upper_dev && ++ netif_is_l3_master(upper_info->upper_dev)) + fib_disable_ip(dev, NETDEV_DOWN, true); + break; + } +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -1377,6 +1377,56 @@ int fib_sync_down_addr(struct net_device + return ret; + } + ++/* Update the PMTU of exceptions when: ++ * - the new MTU of the first hop becomes smaller than the PMTU ++ * - the old MTU was the same as the PMTU, and it limited discovery of ++ * larger MTUs on the path. With that limit raised, we can now ++ * discover larger MTUs ++ * A special case is locked exceptions, for which the PMTU is smaller ++ * than the minimal accepted PMTU: ++ * - if the new MTU is greater than the PMTU, don't make any change ++ * - otherwise, unlock and set PMTU ++ */ ++static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) ++{ ++ struct fnhe_hash_bucket *bucket; ++ int i; ++ ++ bucket = rcu_dereference_protected(nh->nh_exceptions, 1); ++ if (!bucket) ++ return; ++ ++ for (i = 0; i < FNHE_HASH_SIZE; i++) { ++ struct fib_nh_exception *fnhe; ++ ++ for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); ++ fnhe; ++ fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { ++ if (fnhe->fnhe_mtu_locked) { ++ if (new <= fnhe->fnhe_pmtu) { ++ fnhe->fnhe_pmtu = new; ++ fnhe->fnhe_mtu_locked = false; ++ } ++ } else if (new < fnhe->fnhe_pmtu || ++ orig == fnhe->fnhe_pmtu) { ++ fnhe->fnhe_pmtu = new; ++ } ++ } ++ } ++} ++ ++void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) ++{ ++ unsigned int hash = fib_devindex_hashfn(dev->ifindex); ++ struct hlist_head *head = &fib_info_devhash[hash]; ++ struct fib_nh *nh; ++ ++ hlist_for_each_entry(nh, head, nh_hash) { ++ if (nh->nh_dev == dev) ++ nh_update_mtu(nh, dev->mtu, orig_mtu); ++ } ++} ++ + /* Event force Flags Description + * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host + * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host diff --git a/queue-4.9/net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch b/queue-4.9/net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch new file mode 100644 index 00000000000..1062504ab48 --- /dev/null +++ b/queue-4.9/net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch @@ -0,0 +1,61 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Jeff Barnhill <0xeffeff@gmail.com> +Date: Fri, 21 Sep 2018 00:45:27 +0000 +Subject: net/ipv6: Display all addresses in output of /proc/net/if_inet6 + +From: Jeff Barnhill <0xeffeff@gmail.com> + +[ Upstream commit 86f9bd1ff61c413a2a251fa736463295e4e24733 ] + +The backend handling for /proc/net/if_inet6 in addrconf.c doesn't properly +handle starting/stopping the iteration. The problem is that at some point +during the iteration, an overflow is detected and the process is +subsequently stopped. The item being shown via seq_printf() when the +overflow occurs is not actually shown, though. When start() is +subsequently called to resume iterating, it returns the next item, and +thus the item that was being processed when the overflow occurred never +gets printed. + +Alter the meaning of the private data member "offset". Currently, when it +is not 0 (which only happens at the very beginning), "offset" represents +the next hlist item to be printed. After this change, "offset" always +represents the current item. + +This is also consistent with the private data member "bucket", which +represents the current bucket, and also the use of "pos" as defined in +seq_file.txt: + The pos passed to start() will always be either zero, or the most + recent pos used in the previous session. + +Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -4068,7 +4068,6 @@ static struct inet6_ifaddr *if6_get_firs + p++; + continue; + } +- state->offset++; + return ifa; + } + +@@ -4092,13 +4091,12 @@ static struct inet6_ifaddr *if6_get_next + return ifa; + } + ++ state->offset = 0; + while (++state->bucket < IN6_ADDR_HSIZE) { +- state->offset = 0; + hlist_for_each_entry_rcu_bh(ifa, + &inet6_addr_lst[state->bucket], addr_lst) { + if (!net_eq(dev_net(ifa->idev->dev), net)) + continue; +- state->offset++; + return ifa; + } + } diff --git a/queue-4.9/net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch b/queue-4.9/net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch new file mode 100644 index 00000000000..90217f689d8 --- /dev/null +++ b/queue-4.9/net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch @@ -0,0 +1,74 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Maxime Chevallier +Date: Fri, 5 Oct 2018 09:04:40 +0200 +Subject: net: mvpp2: Extract the correct ethtype from the skb for tx csum offload + +From: Maxime Chevallier + +[ Upstream commit 35f3625c21852ad839f20c91c7d81c4c1101e207 ] + +When offloading the L3 and L4 csum computation on TX, we need to extract +the l3_proto from the ethtype, independently of the presence of a vlan +tag. + +The actual driver uses skb->protocol as-is, resulting in packets with +the wrong L4 checksum being sent when there's a vlan tag in the packet +header and checksum offloading is enabled. + +This commit makes use of vlan_protocol_get() to get the correct ethtype +regardless the presence of a vlan tag. + +Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") +Signed-off-by: Maxime Chevallier +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvpp2.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvpp2.c ++++ b/drivers/net/ethernet/marvell/mvpp2.c +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -4266,7 +4267,7 @@ static void mvpp2_txq_desc_put(struct mv + } + + /* Set Tx descriptors fields relevant for CSUM calculation */ +-static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto, ++static u32 mvpp2_txq_desc_csum(int l3_offs, __be16 l3_proto, + int ip_hdr_len, int l4_proto) + { + u32 command; +@@ -5019,14 +5020,15 @@ static u32 mvpp2_skb_tx_csum(struct mvpp + if (skb->ip_summed == CHECKSUM_PARTIAL) { + int ip_hdr_len = 0; + u8 l4_proto; ++ __be16 l3_proto = vlan_get_protocol(skb); + +- if (skb->protocol == htons(ETH_P_IP)) { ++ if (l3_proto == htons(ETH_P_IP)) { + struct iphdr *ip4h = ip_hdr(skb); + + /* Calculate IPv4 checksum and L4 checksum */ + ip_hdr_len = ip4h->ihl; + l4_proto = ip4h->protocol; +- } else if (skb->protocol == htons(ETH_P_IPV6)) { ++ } else if (l3_proto == htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = ipv6_hdr(skb); + + /* Read l4_protocol from one of IPv6 extra headers */ +@@ -5038,7 +5040,7 @@ static u32 mvpp2_skb_tx_csum(struct mvpp + } + + return mvpp2_txq_desc_csum(skb_network_offset(skb), +- skb->protocol, ip_hdr_len, l4_proto); ++ l3_proto, ip_hdr_len, l4_proto); + } + + return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE; diff --git a/queue-4.9/net-systemport-fix-wake-up-interrupt-race-during-resume.patch b/queue-4.9/net-systemport-fix-wake-up-interrupt-race-during-resume.patch new file mode 100644 index 00000000000..c26e3960f7f --- /dev/null +++ b/queue-4.9/net-systemport-fix-wake-up-interrupt-race-during-resume.patch @@ -0,0 +1,90 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Florian Fainelli +Date: Tue, 2 Oct 2018 16:52:03 -0700 +Subject: net: systemport: Fix wake-up interrupt race during resume + +From: Florian Fainelli + +[ Upstream commit 45ec318578c0c22a11f5b9927d064418e1ab1905 ] + +The AON_PM_L2 is normally used to trigger and identify the source of a +wake-up event. Since the RX_SYS clock is no longer turned off, we also +have an interrupt being sent to the SYSTEMPORT INTRL_2_0 controller, and +that interrupt remains active up until the magic packet detector is +disabled which happens much later during the driver resumption. + +The race happens if we have a CPU that is entering the SYSTEMPORT +INTRL2_0 handler during resume, and another CPU has managed to clear the +wake-up interrupt during bcm_sysport_resume_from_wol(). In that case, we +have the first CPU stuck in the interrupt handler with an interrupt +cause that has been cleared under its feet, and so we keep returning +IRQ_NONE and we never make any progress. + +This was not a problem before because we would always turn off the +RX_SYS clock during WoL, so the SYSTEMPORT INTRL2_0 would also be turned +off as well, thus not latching the interrupt. + +The fix is to make sure we do not enable either the MPD or +BRCM_TAG_MATCH interrupts since those are redundant with what the +AON_PM_L2 interrupt controller already processes and they would cause +such a race to occur. + +Fixes: bb9051a2b230 ("net: systemport: Add support for WAKE_FILTER") +Fixes: 83e82f4c706b ("net: systemport: add Wake-on-LAN support") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bcmsysport.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bcmsysport.c ++++ b/drivers/net/ethernet/broadcom/bcmsysport.c +@@ -828,14 +828,22 @@ static void bcm_sysport_resume_from_wol( + { + u32 reg; + +- /* Stop monitoring MPD interrupt */ +- intrl2_0_mask_set(priv, INTRL2_0_MPD); +- + /* Clear the MagicPacket detection logic */ + reg = umac_readl(priv, UMAC_MPD_CTRL); + reg &= ~MPD_EN; + umac_writel(priv, reg, UMAC_MPD_CTRL); + ++ reg = intrl2_0_readl(priv, INTRL2_CPU_STATUS); ++ if (reg & INTRL2_0_MPD) ++ netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n"); ++ ++ if (reg & INTRL2_0_BRCM_MATCH_TAG) { ++ reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) & ++ RXCHK_BRCM_TAG_MATCH_MASK; ++ netdev_info(priv->netdev, ++ "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg); ++ } ++ + netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n"); + } + +@@ -868,11 +876,6 @@ static irqreturn_t bcm_sysport_rx_isr(in + if (priv->irq0_stat & INTRL2_0_TX_RING_FULL) + bcm_sysport_tx_reclaim_all(priv); + +- if (priv->irq0_stat & INTRL2_0_MPD) { +- netdev_info(priv->netdev, "Wake-on-LAN interrupt!\n"); +- bcm_sysport_resume_from_wol(priv); +- } +- + return IRQ_HANDLED; + } + +@@ -1901,9 +1904,6 @@ static int bcm_sysport_suspend_to_wol(st + /* UniMAC receive needs to be turned on */ + umac_enable_set(priv, CMD_RX_EN, 1); + +- /* Enable the interrupt wake-up source */ +- intrl2_0_mask_clear(priv, INTRL2_0_MPD); +- + netif_dbg(priv, wol, ndev, "entered WOL mode\n"); + + return 0; diff --git a/queue-4.9/net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch b/queue-4.9/net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch new file mode 100644 index 00000000000..6b7810990ed --- /dev/null +++ b/queue-4.9/net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Yu Zhao +Date: Fri, 28 Sep 2018 17:04:30 -0600 +Subject: net/usb: cancel pending work when unbinding smsc75xx + +From: Yu Zhao + +[ Upstream commit f7b2a56e1f3dcbdb4cf09b2b63e859ffe0e09df8 ] + +Cancel pending work before freeing smsc75xx private data structure +during binding. This fixes the following crash in the driver: + +BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 +IP: mutex_lock+0x2b/0x3f + +Workqueue: events smsc75xx_deferred_multicast_write [smsc75xx] +task: ffff8caa83e85700 task.stack: ffff948b80518000 +RIP: 0010:mutex_lock+0x2b/0x3f + +Call Trace: + smsc75xx_deferred_multicast_write+0x40/0x1af [smsc75xx] + process_one_work+0x18d/0x2fc + worker_thread+0x1a2/0x269 + ? pr_cont_work+0x58/0x58 + kthread+0xfa/0x10a + ? pr_cont_work+0x58/0x58 + ? rcu_read_unlock_sched_notrace+0x48/0x48 + ret_from_fork+0x22/0x40 + +Signed-off-by: Yu Zhao +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/smsc75xx.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/smsc75xx.c ++++ b/drivers/net/usb/smsc75xx.c +@@ -1518,6 +1518,7 @@ static void smsc75xx_unbind(struct usbne + { + struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); + if (pdata) { ++ cancel_work_sync(&pdata->set_multicast); + netif_dbg(dev, ifdown, dev->net, "free pdata\n"); + kfree(pdata); + pdata = NULL; diff --git a/queue-4.9/netlabel-check-for-ipv4mask-in-addrinfo_get.patch b/queue-4.9/netlabel-check-for-ipv4mask-in-addrinfo_get.patch new file mode 100644 index 00000000000..7d1cc7fb60f --- /dev/null +++ b/queue-4.9/netlabel-check-for-ipv4mask-in-addrinfo_get.patch @@ -0,0 +1,61 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Sean Tranchetti +Date: Thu, 20 Sep 2018 14:29:45 -0600 +Subject: netlabel: check for IPV4MASK in addrinfo_get + +From: Sean Tranchetti + +[ Upstream commit f88b4c01b97e09535505cf3c327fdbce55c27f00 ] + +netlbl_unlabel_addrinfo_get() assumes that if it finds the +NLBL_UNLABEL_A_IPV4ADDR attribute, it must also have the +NLBL_UNLABEL_A_IPV4MASK attribute as well. However, this is +not necessarily the case as the current checks in +netlbl_unlabel_staticadd() and friends are not sufficent to +enforce this. + +If passed a netlink message with NLBL_UNLABEL_A_IPV4ADDR, +NLBL_UNLABEL_A_IPV6ADDR, and NLBL_UNLABEL_A_IPV6MASK attributes, +these functions will all call netlbl_unlabel_addrinfo_get() which +will then attempt dereference NULL when fetching the non-existent +NLBL_UNLABEL_A_IPV4MASK attribute: + +Unable to handle kernel NULL pointer dereference at virtual address 0 +Process unlab (pid: 31762, stack limit = 0xffffff80502d8000) +Call trace: + netlbl_unlabel_addrinfo_get+0x44/0xd8 + netlbl_unlabel_staticremovedef+0x98/0xe0 + genl_rcv_msg+0x354/0x388 + netlink_rcv_skb+0xac/0x118 + genl_rcv+0x34/0x48 + netlink_unicast+0x158/0x1f0 + netlink_sendmsg+0x32c/0x338 + sock_sendmsg+0x44/0x60 + ___sys_sendmsg+0x1d0/0x2a8 + __sys_sendmsg+0x64/0xb4 + SyS_sendmsg+0x34/0x4c + el0_svc_naked+0x34/0x38 +Code: 51001149 7100113f 540000a0 f9401508 (79400108) +---[ end trace f6438a488e737143 ]--- +Kernel panic - not syncing: Fatal exception + +Signed-off-by: Sean Tranchetti + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlabel/netlabel_unlabeled.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/netlabel/netlabel_unlabeled.c ++++ b/net/netlabel/netlabel_unlabeled.c +@@ -787,7 +787,8 @@ static int netlbl_unlabel_addrinfo_get(s + { + u32 addr_len; + +- if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR]) { ++ if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] && ++ info->attrs[NLBL_UNLABEL_A_IPV4MASK]) { + addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); + if (addr_len != sizeof(struct in_addr) && + addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK])) diff --git a/queue-4.9/qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch b/queue-4.9/qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch new file mode 100644 index 00000000000..579a684fdf9 --- /dev/null +++ b/queue-4.9/qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch @@ -0,0 +1,149 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Shahed Shaikh +Date: Wed, 26 Sep 2018 12:41:10 -0700 +Subject: qlcnic: fix Tx descriptor corruption on 82xx devices + +From: Shahed Shaikh + +[ Upstream commit c333fa0c4f220f8f7ea5acd6b0ebf3bf13fd684d ] + +In regular NIC transmission flow, driver always configures MAC using +Tx queue zero descriptor as a part of MAC learning flow. +But with multi Tx queue supported NIC, regular transmission can occur on +any non-zero Tx queue and from that context it uses +Tx queue zero descriptor to configure MAC, at the same time TX queue +zero could be used by another CPU for regular transmission +which could lead to Tx queue zero descriptor corruption and cause FW +abort. + +This patch fixes this in such a way that driver always configures +learned MAC address from the same Tx queue which is used for +regular transmission. + +Fixes: 7e2cf4feba05 ("qlcnic: change driver hardware interface mechanism") +Signed-off-by: Shahed Shaikh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 8 +++++--- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 3 ++- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h | 3 ++- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h | 3 ++- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 12 ++++++------ + 5 files changed, 17 insertions(+), 12 deletions(-) + +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +@@ -1800,7 +1800,8 @@ struct qlcnic_hardware_ops { + int (*config_loopback) (struct qlcnic_adapter *, u8); + int (*clear_loopback) (struct qlcnic_adapter *, u8); + int (*config_promisc_mode) (struct qlcnic_adapter *, u32); +- void (*change_l2_filter) (struct qlcnic_adapter *, u64 *, u16); ++ void (*change_l2_filter)(struct qlcnic_adapter *adapter, u64 *addr, ++ u16 vlan, struct qlcnic_host_tx_ring *tx_ring); + int (*get_board_info) (struct qlcnic_adapter *); + void (*set_mac_filter_count) (struct qlcnic_adapter *); + void (*free_mac_list) (struct qlcnic_adapter *); +@@ -2042,9 +2043,10 @@ static inline int qlcnic_nic_set_promisc + } + + static inline void qlcnic_change_filter(struct qlcnic_adapter *adapter, +- u64 *addr, u16 id) ++ u64 *addr, u16 vlan, ++ struct qlcnic_host_tx_ring *tx_ring) + { +- adapter->ahw->hw_ops->change_l2_filter(adapter, addr, id); ++ adapter->ahw->hw_ops->change_l2_filter(adapter, addr, vlan, tx_ring); + } + + static inline int qlcnic_get_board_info(struct qlcnic_adapter *adapter) +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +@@ -2132,7 +2132,8 @@ out: + } + + void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, +- u16 vlan_id) ++ u16 vlan_id, ++ struct qlcnic_host_tx_ring *tx_ring) + { + u8 mac[ETH_ALEN]; + memcpy(&mac, addr, ETH_ALEN); +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h +@@ -550,7 +550,8 @@ int qlcnic_83xx_wrt_reg_indirect(struct + int qlcnic_83xx_nic_set_promisc(struct qlcnic_adapter *, u32); + int qlcnic_83xx_config_hw_lro(struct qlcnic_adapter *, int); + int qlcnic_83xx_config_rss(struct qlcnic_adapter *, int); +-void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *, u64 *, u16); ++void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, ++ u16 vlan, struct qlcnic_host_tx_ring *ring); + int qlcnic_83xx_get_pci_info(struct qlcnic_adapter *, struct qlcnic_pci_info *); + int qlcnic_83xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *); + void qlcnic_83xx_initialize_nic(struct qlcnic_adapter *, int); +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h +@@ -173,7 +173,8 @@ int qlcnic_82xx_napi_add(struct qlcnic_a + struct net_device *netdev); + void qlcnic_82xx_get_beacon_state(struct qlcnic_adapter *); + void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, +- u64 *uaddr, u16 vlan_id); ++ u64 *uaddr, u16 vlan_id, ++ struct qlcnic_host_tx_ring *tx_ring); + int qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *, + struct ethtool_coalesce *); + int qlcnic_82xx_set_rx_coalesce(struct qlcnic_adapter *); +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +@@ -268,13 +268,12 @@ static void qlcnic_add_lb_filter(struct + } + + void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr, +- u16 vlan_id) ++ u16 vlan_id, struct qlcnic_host_tx_ring *tx_ring) + { + struct cmd_desc_type0 *hwdesc; + struct qlcnic_nic_req *req; + struct qlcnic_mac_req *mac_req; + struct qlcnic_vlan_req *vlan_req; +- struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring; + u32 producer; + u64 word; + +@@ -301,7 +300,8 @@ void qlcnic_82xx_change_filter(struct ql + + static void qlcnic_send_filter(struct qlcnic_adapter *adapter, + struct cmd_desc_type0 *first_desc, +- struct sk_buff *skb) ++ struct sk_buff *skb, ++ struct qlcnic_host_tx_ring *tx_ring) + { + struct vlan_ethhdr *vh = (struct vlan_ethhdr *)(skb->data); + struct ethhdr *phdr = (struct ethhdr *)(skb->data); +@@ -335,7 +335,7 @@ static void qlcnic_send_filter(struct ql + tmp_fil->vlan_id == vlan_id) { + if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) + qlcnic_change_filter(adapter, &src_addr, +- vlan_id); ++ vlan_id, tx_ring); + tmp_fil->ftime = jiffies; + return; + } +@@ -350,7 +350,7 @@ static void qlcnic_send_filter(struct ql + if (!fil) + return; + +- qlcnic_change_filter(adapter, &src_addr, vlan_id); ++ qlcnic_change_filter(adapter, &src_addr, vlan_id, tx_ring); + fil->ftime = jiffies; + fil->vlan_id = vlan_id; + memcpy(fil->faddr, &src_addr, ETH_ALEN); +@@ -766,7 +766,7 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_ + } + + if (adapter->drv_mac_learn) +- qlcnic_send_filter(adapter, first_desc, skb); ++ qlcnic_send_filter(adapter, first_desc, skb, tx_ring); + + tx_ring->tx_stats.tx_bytes += skb->len; + tx_ring->tx_stats.xmit_called++; diff --git a/queue-4.9/qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch b/queue-4.9/qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch new file mode 100644 index 00000000000..b58423f0e4c --- /dev/null +++ b/queue-4.9/qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch @@ -0,0 +1,30 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Giacinto Cifelli +Date: Wed, 10 Oct 2018 20:05:53 +0200 +Subject: qmi_wwan: Added support for Gemalto's Cinterion ALASxx WWAN interface + +From: Giacinto Cifelli + +[ Upstream commit 4f7617705bfff84d756fe4401a1f4f032f374984 ] + +Added support for Gemalto's Cinterion ALASxx WWAN interfaces +by adding QMI_FIXED_INTF with Cinterion's VID and PID. + +Signed-off-by: Giacinto Cifelli +Acked-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/qmi_wwan.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -934,6 +934,7 @@ static const struct usb_device_id produc + {QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */ + {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */ + {QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */ ++ {QMI_FIXED_INTF(0x1e2d, 0x0063, 10)}, /* Cinterion ALASxx (1 RmNet) */ + {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */ + {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */ + {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/ diff --git a/queue-4.9/rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch b/queue-4.9/rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch new file mode 100644 index 00000000000..dc9865f8006 --- /dev/null +++ b/queue-4.9/rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch @@ -0,0 +1,54 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Eric Dumazet +Date: Tue, 2 Oct 2018 15:47:35 -0700 +Subject: rtnl: limit IFLA_NUM_TX_QUEUES and IFLA_NUM_RX_QUEUES to 4096 + +From: Eric Dumazet + +[ Upstream commit 0e1d6eca5113858ed2caea61a5adc03c595f6096 ] + +We have an impressive number of syzkaller bugs that are linked +to the fact that syzbot was able to create a networking device +with millions of TX (or RX) queues. + +Let's limit the number of RX/TX queues to 4096, this really should +cover all known cases. + +A separate patch will add various cond_resched() in the loops +handling sysfs entries at device creation and dismantle. + +Tested: + +lpaa6:~# ip link add gre-4097 numtxqueues 4097 numrxqueues 4097 type ip6gretap +RTNETLINK answers: Invalid argument + +lpaa6:~# time ip link add gre-4096 numtxqueues 4096 numrxqueues 4096 type ip6gretap + +real 0m0.180s +user 0m0.000s +sys 0m0.107s + +Fixes: 76ff5cc91935 ("rtnl: allow to specify number of rx and tx queues on device creation") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -2368,6 +2368,12 @@ struct net_device *rtnl_create_link(stru + else if (ops->get_num_rx_queues) + num_rx_queues = ops->get_num_rx_queues(); + ++ if (num_tx_queues < 1 || num_tx_queues > 4096) ++ return ERR_PTR(-EINVAL); ++ ++ if (num_rx_queues < 1 || num_rx_queues > 4096) ++ return ERR_PTR(-EINVAL); ++ + err = -ENOMEM; + dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type, + ops->setup, num_tx_queues, num_rx_queues); diff --git a/queue-4.9/series b/queue-4.9/series index bc42a52a6dd..cce739a4a44 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -19,3 +19,24 @@ perf-script-python-fix-export-to-postgresql.py-occasional-failure.patch mm-preserve-_page_devmap-across-mprotect-calls.patch i2c-i2c-scmi-fix-for-i2c_smbus_write_block_data.patch xhci-don-t-print-a-warning-when-setting-link-state-for-disabled-ports.patch +bnxt_en-fix-tx-timeout-during-netpoll.patch +bonding-avoid-possible-dead-lock.patch +ip6_tunnel-be-careful-when-accessing-the-inner-header.patch +ip_tunnel-be-careful-when-accessing-the-inner-header.patch +ipv4-fix-use-after-free-in-ip_cmsg_recv_dstaddr.patch +ipv6-take-rcu-lock-in-rawv6_send_hdrinc.patch +net-dsa-bcm_sf2-call-setup-during-switch-resume.patch +net-hns-fix-for-unmapping-problem-when-smmu-is-on.patch +net-ipv4-update-fnhe_pmtu-when-first-hop-s-mtu-changes.patch +net-ipv6-display-all-addresses-in-output-of-proc-net-if_inet6.patch +netlabel-check-for-ipv4mask-in-addrinfo_get.patch +net-usb-cancel-pending-work-when-unbinding-smsc75xx.patch +qlcnic-fix-tx-descriptor-corruption-on-82xx-devices.patch +qmi_wwan-added-support-for-gemalto-s-cinterion-alasxx-wwan-interface.patch +team-forbid-enslaving-team-device-to-itself.patch +net-dsa-bcm_sf2-fix-unbind-ordering.patch +net-mvpp2-extract-the-correct-ethtype-from-the-skb-for-tx-csum-offload.patch +net-systemport-fix-wake-up-interrupt-race-during-resume.patch +rtnl-limit-ifla_num_tx_queues-and-ifla_num_rx_queues-to-4096.patch +tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch +inet-make-sure-to-grab-rcu_read_lock-before-using-ireq-ireq_opt.patch diff --git a/queue-4.9/tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch b/queue-4.9/tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch new file mode 100644 index 00000000000..2c4bcc391bf --- /dev/null +++ b/queue-4.9/tcp-dccp-fix-lockdep-issue-when-syn-is-backlogged.patch @@ -0,0 +1,85 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Eric Dumazet +Date: Mon, 1 Oct 2018 15:02:26 -0700 +Subject: tcp/dccp: fix lockdep issue when SYN is backlogged + +From: Eric Dumazet + +[ Upstream commit 1ad98e9d1bdf4724c0a8532fabd84bf3c457c2bc ] + +In normal SYN processing, packets are handled without listener +lock and in RCU protected ingress path. + +But syzkaller is known to be able to trick us and SYN +packets might be processed in process context, after being +queued into socket backlog. + +In commit 06f877d613be ("tcp/dccp: fix other lockdep splats +accessing ireq_opt") I made a very stupid fix, that happened +to work mostly because of the regular path being RCU protected. + +Really the thing protecting ireq->ireq_opt is RCU read lock, +and the pseudo request refcnt is not relevant. + +This patch extends what I did in commit 449809a66c1d ("tcp/dccp: +block BH for SYN processing") by adding an extra rcu_read_{lock|unlock} +pair in the paths that might be taken when processing SYN from +socket backlog (thus possibly in process context) + +Fixes: 06f877d613be ("tcp/dccp: fix other lockdep splats accessing ireq_opt") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_sock.h | 3 +-- + net/dccp/input.c | 4 +++- + net/ipv4/tcp_input.c | 4 +++- + 3 files changed, 7 insertions(+), 4 deletions(-) + +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -134,8 +134,7 @@ static inline int inet_request_bound_dev + + static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) + { +- return rcu_dereference_check(ireq->ireq_opt, +- atomic_read(&ireq->req.rsk_refcnt) > 0); ++ return rcu_dereference(ireq->ireq_opt); + } + + struct inet_cork { +--- a/net/dccp/input.c ++++ b/net/dccp/input.c +@@ -605,11 +605,13 @@ int dccp_rcv_state_process(struct sock * + if (sk->sk_state == DCCP_LISTEN) { + if (dh->dccph_type == DCCP_PKT_REQUEST) { + /* It is possible that we process SYN packets from backlog, +- * so we need to make sure to disable BH right there. ++ * so we need to make sure to disable BH and RCU right there. + */ ++ rcu_read_lock(); + local_bh_disable(); + acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0; + local_bh_enable(); ++ rcu_read_unlock(); + if (!acceptable) + return 1; + consume_skb(skb); +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -5978,11 +5978,13 @@ int tcp_rcv_state_process(struct sock *s + if (th->fin) + goto discard; + /* It is possible that we process SYN packets from backlog, +- * so we need to make sure to disable BH right there. ++ * so we need to make sure to disable BH and RCU right there. + */ ++ rcu_read_lock(); + local_bh_disable(); + acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; + local_bh_enable(); ++ rcu_read_unlock(); + + if (!acceptable) + return 1; diff --git a/queue-4.9/team-forbid-enslaving-team-device-to-itself.patch b/queue-4.9/team-forbid-enslaving-team-device-to-itself.patch new file mode 100644 index 00000000000..3b26c88c1ef --- /dev/null +++ b/queue-4.9/team-forbid-enslaving-team-device-to-itself.patch @@ -0,0 +1,124 @@ +From foo@baz Tue Oct 16 16:15:55 CEST 2018 +From: Ido Schimmel +Date: Mon, 1 Oct 2018 12:21:59 +0300 +Subject: team: Forbid enslaving team device to itself + +From: Ido Schimmel + +[ Upstream commit 471b83bd8bbe4e89743683ef8ecb78f7029d8288 ] + +team's ndo_add_slave() acquires 'team->lock' and later tries to open the +newly enslaved device via dev_open(). This emits a 'NETDEV_UP' event +that causes the VLAN driver to add VLAN 0 on the team device. team's +ndo_vlan_rx_add_vid() will also try to acquire 'team->lock' and +deadlock. + +Fix this by checking early at the enslavement function that a team +device is not being enslaved to itself. + +A similar check was added to the bond driver in commit 09a89c219baf +("bonding: disallow enslaving a bond to itself"). + +WARNING: possible recursive locking detected +4.18.0-rc7+ #176 Not tainted +-------------------------------------------- +syz-executor4/6391 is trying to acquire lock: +(____ptrval____) (&team->lock){+.+.}, at: team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868 + +but task is already holding lock: +(____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947 + +other info that might help us debug this: + Possible unsafe locking scenario: + + CPU0 + ---- + lock(&team->lock); + lock(&team->lock); + + *** DEADLOCK *** + + May be due to missing lock nesting notation + +2 locks held by syz-executor4/6391: + #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline] + #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4662 + #1: (____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947 + +stack backtrace: +CPU: 1 PID: 6391 Comm: syz-executor4 Not tainted 4.18.0-rc7+ #176 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 + print_deadlock_bug kernel/locking/lockdep.c:1765 [inline] + check_deadlock kernel/locking/lockdep.c:1809 [inline] + validate_chain kernel/locking/lockdep.c:2405 [inline] + __lock_acquire.cold.64+0x1fb/0x486 kernel/locking/lockdep.c:3435 + lock_acquire+0x1e4/0x540 kernel/locking/lockdep.c:3924 + __mutex_lock_common kernel/locking/mutex.c:757 [inline] + __mutex_lock+0x176/0x1820 kernel/locking/mutex.c:894 + mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:909 + team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868 + vlan_add_rx_filter_info+0x14a/0x1d0 net/8021q/vlan_core.c:210 + __vlan_vid_add net/8021q/vlan_core.c:278 [inline] + vlan_vid_add+0x63e/0x9d0 net/8021q/vlan_core.c:308 + vlan_device_event.cold.12+0x2a/0x2f net/8021q/vlan.c:381 + notifier_call_chain+0x180/0x390 kernel/notifier.c:93 + __raw_notifier_call_chain kernel/notifier.c:394 [inline] + raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 + call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1735 + call_netdevice_notifiers net/core/dev.c:1753 [inline] + dev_open+0x173/0x1b0 net/core/dev.c:1433 + team_port_add drivers/net/team/team.c:1219 [inline] + team_add_slave+0xa8b/0x1c30 drivers/net/team/team.c:1948 + do_set_master+0x1c9/0x220 net/core/rtnetlink.c:2248 + do_setlink+0xba4/0x3e10 net/core/rtnetlink.c:2382 + rtnl_setlink+0x2a9/0x400 net/core/rtnetlink.c:2636 + rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4665 + netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2455 + rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4683 + netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] + netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 + netlink_sendmsg+0xa18/0xfd0 net/netlink/af_netlink.c:1908 + sock_sendmsg_nosec net/socket.c:642 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:652 + ___sys_sendmsg+0x7fd/0x930 net/socket.c:2126 + __sys_sendmsg+0x11d/0x290 net/socket.c:2164 + __do_sys_sendmsg net/socket.c:2173 [inline] + __se_sys_sendmsg net/socket.c:2171 [inline] + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2171 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x456b29 +Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007f9706bf8c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f9706bf96d4 RCX: 0000000000456b29 +RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000004 +RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 00000000004d3548 R14: 00000000004c8227 R15: 0000000000000000 + +Fixes: 87002b03baab ("net: introduce vlan_vid_[add/del] and use them instead of direct [add/kill]_vid ndo calls") +Signed-off-by: Ido Schimmel +Reported-and-tested-by: syzbot+bd051aba086537515cdb@syzkaller.appspotmail.com +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/team/team.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -1171,6 +1171,11 @@ static int team_port_add(struct team *te + return -EBUSY; + } + ++ if (dev == port_dev) { ++ netdev_err(dev, "Cannot enslave team device to itself\n"); ++ return -EINVAL; ++ } ++ + if (port_dev->features & NETIF_F_VLAN_CHALLENGED && + vlan_uses_dev(dev)) { + netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n",