From e7b687fde72f1391278ac5dbddf23c630ba4d0b9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 18 Aug 2018 11:48:21 +0200 Subject: [PATCH] 4.14-stable patches added patches: dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch llc-use-refcount_inc_not_zero-for-llc_sap_find.patch net-aquantia-fix-iff_allmulti-flag-functionality.patch net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch vhost-reset-metadata-cache-when-initializing-new-iotlb.patch vsock-split-dwork-to-avoid-reinitializations.patch --- ...ith-cwnd-shift-in-ccid2_cwnd_restart.patch | 75 ++++++++++ ...r-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch | 43 ++++++ ...eck-to-avoid-race-on-sk-sk_dst_cache.patch | 95 +++++++++++++ ...fcount_inc_not_zero-for-llc_sap_find.patch | 54 +++++++ ...-fix-iff_allmulti-flag-functionality.patch | 36 +++++ ...info-when-create-new-tc_index-filter.patch | 84 +++++++++++ ...reference-when-delete-tcindex-filter.patch | 120 ++++++++++++++++ queue-4.14/series | 9 ++ ...ta-cache-when-initializing-new-iotlb.patch | 40 ++++++ ...lit-dwork-to-avoid-reinitializations.patch | 134 ++++++++++++++++++ 10 files changed, 690 insertions(+) create mode 100644 queue-4.14/dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch create mode 100644 queue-4.14/ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch create mode 100644 queue-4.14/l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch create mode 100644 queue-4.14/llc-use-refcount_inc_not_zero-for-llc_sap_find.patch create mode 100644 queue-4.14/net-aquantia-fix-iff_allmulti-flag-functionality.patch create mode 100644 queue-4.14/net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch create mode 100644 queue-4.14/net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch create mode 100644 queue-4.14/series create mode 100644 queue-4.14/vhost-reset-metadata-cache-when-initializing-new-iotlb.patch create mode 100644 queue-4.14/vsock-split-dwork-to-avoid-reinitializations.patch diff --git a/queue-4.14/dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch b/queue-4.14/dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch new file mode 100644 index 00000000000..49a5765d181 --- /dev/null +++ b/queue-4.14/dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch @@ -0,0 +1,75 @@ +From foo@baz Sat Aug 18 11:45:17 CEST 2018 +From: Alexey Kodanev +Date: Tue, 7 Aug 2018 20:03:57 +0300 +Subject: dccp: fix undefined behavior with 'cwnd' shift in ccid2_cwnd_restart() + +From: Alexey Kodanev + +[ Upstream commit 61ef4b07fcdc30535889990cf4229766502561cf ] + +The shift of 'cwnd' with '(now - hc->tx_lsndtime) / hc->tx_rto' value +can lead to undefined behavior [1]. + +In order to fix this use a gradual shift of the window with a 'while' +loop, similar to what tcp_cwnd_restart() is doing. + +When comparing delta and RTO there is a minor difference between TCP +and DCCP, the last one also invokes dccp_cwnd_restart() and reduces +'cwnd' if delta equals RTO. That case is preserved in this change. + +[1]: +[40850.963623] UBSAN: Undefined behaviour in net/dccp/ccids/ccid2.c:237:7 +[40851.043858] shift exponent 67 is too large for 32-bit type 'unsigned int' +[40851.127163] CPU: 3 PID: 15940 Comm: netstress Tainted: G W E 4.18.0-rc7.x86_64 #1 +... +[40851.377176] Call Trace: +[40851.408503] dump_stack+0xf1/0x17b +[40851.451331] ? show_regs_print_info+0x5/0x5 +[40851.503555] ubsan_epilogue+0x9/0x7c +[40851.548363] __ubsan_handle_shift_out_of_bounds+0x25b/0x2b4 +[40851.617109] ? __ubsan_handle_load_invalid_value+0x18f/0x18f +[40851.686796] ? xfrm4_output_finish+0x80/0x80 +[40851.739827] ? lock_downgrade+0x6d0/0x6d0 +[40851.789744] ? xfrm4_prepare_output+0x160/0x160 +[40851.845912] ? ip_queue_xmit+0x810/0x1db0 +[40851.895845] ? ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp] +[40851.963530] ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp] +[40852.029063] dccp_xmit_packet+0x1d3/0x720 [dccp] +[40852.086254] dccp_write_xmit+0x116/0x1d0 [dccp] +[40852.142412] dccp_sendmsg+0x428/0xb20 [dccp] +[40852.195454] ? inet_dccp_listen+0x200/0x200 [dccp] +[40852.254833] ? sched_clock+0x5/0x10 +[40852.298508] ? sched_clock+0x5/0x10 +[40852.342194] ? inet_create+0xdf0/0xdf0 +[40852.388988] sock_sendmsg+0xd9/0x160 +... + +Fixes: 113ced1f52e5 ("dccp ccid-2: Perform congestion-window validation") +Signed-off-by: Alexey Kodanev +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ccids/ccid2.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/dccp/ccids/ccid2.c ++++ b/net/dccp/ccids/ccid2.c +@@ -228,14 +228,16 @@ static void ccid2_cwnd_restart(struct so + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + u32 cwnd = hc->tx_cwnd, restart_cwnd, + iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache); ++ s32 delta = now - hc->tx_lsndtime; + + hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2)); + + /* don't reduce cwnd below the initial window (IW) */ + restart_cwnd = min(cwnd, iwnd); +- cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto; +- hc->tx_cwnd = max(cwnd, restart_cwnd); + ++ while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd) ++ cwnd >>= 1; ++ hc->tx_cwnd = max(cwnd, restart_cwnd); + hc->tx_cwnd_stamp = now; + hc->tx_cwnd_used = 0; + diff --git a/queue-4.14/ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch b/queue-4.14/ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch new file mode 100644 index 00000000000..66805ccf3fa --- /dev/null +++ b/queue-4.14/ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch @@ -0,0 +1,43 @@ +From foo@baz Sat Aug 18 11:45:17 CEST 2018 +From: Xin Long +Date: Sun, 5 Aug 2018 22:46:07 +0800 +Subject: ip6_tunnel: use the right value for ipv4 min mtu check in ip6_tnl_xmit + +From: Xin Long + +[ Upstream commit 82a40777de12728dedf4075453b694f0d1baee80 ] + +According to RFC791, 68 bytes is the minimum size of IPv4 datagram every +device must be able to forward without further fragmentation while 576 +bytes is the minimum size of IPv4 datagram every device has to be able +to receive, so in ip6_tnl_xmit(), 68(IPV4_MIN_MTU) should be the right +value for the ipv4 min mtu check in ip6_tnl_xmit. + +While at it, change to use max() instead of if statement. + +Fixes: c9fefa08190f ("ip6_tunnel: get the min mtu properly in ip6_tnl_xmit") +Reported-by: Sabrina Dubroca +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1134,12 +1134,8 @@ route_lookup: + max_headroom += 8; + mtu -= 8; + } +- if (skb->protocol == htons(ETH_P_IPV6)) { +- if (mtu < IPV6_MIN_MTU) +- mtu = IPV6_MIN_MTU; +- } else if (mtu < 576) { +- mtu = 576; +- } ++ mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ? ++ IPV6_MIN_MTU : IPV4_MIN_MTU); + + skb_dst_update_pmtu(skb, mtu); + if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { diff --git a/queue-4.14/l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch b/queue-4.14/l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch new file mode 100644 index 00000000000..99cb3088097 --- /dev/null +++ b/queue-4.14/l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch @@ -0,0 +1,95 @@ +From foo@baz Sat Aug 18 11:45:17 CEST 2018 +From: Wei Wang +Date: Fri, 10 Aug 2018 11:14:56 -0700 +Subject: l2tp: use sk_dst_check() to avoid race on sk->sk_dst_cache + +From: Wei Wang + +[ Upstream commit 6d37fa49da1e8db8fb1995be22ac837ca41ac8a8 ] + +In l2tp code, if it is a L2TP_UDP_ENCAP tunnel, tunnel->sk points to a +UDP socket. User could call sendmsg() on both this tunnel and the UDP +socket itself concurrently. As l2tp_xmit_skb() holds socket lock and call +__sk_dst_check() to refresh sk->sk_dst_cache, while udpv6_sendmsg() is +lockless and call sk_dst_check() to refresh sk->sk_dst_cache, there +could be a race and cause the dst cache to be freed multiple times. +So we fix l2tp side code to always call sk_dst_check() to garantee +xchg() is called when refreshing sk->sk_dst_cache to avoid race +conditions. + +Syzkaller reported stack trace: +BUG: KASAN: use-after-free in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] +BUG: KASAN: use-after-free in atomic_fetch_add_unless include/linux/atomic.h:575 [inline] +BUG: KASAN: use-after-free in atomic_add_unless include/linux/atomic.h:597 [inline] +BUG: KASAN: use-after-free in dst_hold_safe include/net/dst.h:308 [inline] +BUG: KASAN: use-after-free in ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029 +Read of size 4 at addr ffff8801aea9a880 by task syz-executor129/4829 + +CPU: 0 PID: 4829 Comm: syz-executor129 Not tainted 4.18.0-rc7-next-20180802+ #30 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 + print_address_description+0x6c/0x20b mm/kasan/report.c:256 + kasan_report_error mm/kasan/report.c:354 [inline] + kasan_report.cold.7+0x242/0x30d mm/kasan/report.c:412 + check_memory_region_inline mm/kasan/kasan.c:260 [inline] + check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267 + kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272 + atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] + atomic_fetch_add_unless include/linux/atomic.h:575 [inline] + atomic_add_unless include/linux/atomic.h:597 [inline] + dst_hold_safe include/net/dst.h:308 [inline] + ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029 + rt6_get_pcpu_route net/ipv6/route.c:1249 [inline] + ip6_pol_route+0x354/0xd20 net/ipv6/route.c:1922 + ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2098 + fib6_rule_lookup+0x283/0x890 net/ipv6/fib6_rules.c:122 + ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2126 + ip6_dst_lookup_tail+0x1278/0x1da0 net/ipv6/ip6_output.c:978 + ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079 + ip6_sk_dst_lookup_flow+0x5ed/0xc50 net/ipv6/ip6_output.c:1117 + udpv6_sendmsg+0x2163/0x36b0 net/ipv6/udp.c:1354 + inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 + sock_sendmsg_nosec net/socket.c:622 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:632 + ___sys_sendmsg+0x51d/0x930 net/socket.c:2115 + __sys_sendmmsg+0x240/0x6f0 net/socket.c:2210 + __do_sys_sendmmsg net/socket.c:2239 [inline] + __se_sys_sendmmsg net/socket.c:2236 [inline] + __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2236 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x446a29 +Code: e8 ac b8 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007f4de5532db8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 +RAX: ffffffffffffffda RBX: 00000000006dcc38 RCX: 0000000000446a29 +RDX: 00000000000000b8 RSI: 0000000020001b00 RDI: 0000000000000003 +RBP: 00000000006dcc30 R08: 00007f4de5533700 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dcc3c +R13: 00007ffe2b830fdf R14: 00007f4de55339c0 R15: 0000000000000001 + +Fixes: 71b1391a4128 ("l2tp: ensure sk->dst is still valid") +Reported-by: syzbot+05f840f3b04f211bad55@syzkaller.appspotmail.com +Signed-off-by: Wei Wang +Signed-off-by: Martin KaFai Lau +Cc: Guillaume Nault +Cc: David Ahern +Cc: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -1211,7 +1211,7 @@ int l2tp_xmit_skb(struct l2tp_session *s + + /* Get routing info from the tunnel socket */ + skb_dst_drop(skb); +- skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0))); ++ skb_dst_set(skb, sk_dst_check(sk, 0)); + + inet = inet_sk(sk); + fl = &inet->cork.fl; diff --git a/queue-4.14/llc-use-refcount_inc_not_zero-for-llc_sap_find.patch b/queue-4.14/llc-use-refcount_inc_not_zero-for-llc_sap_find.patch new file mode 100644 index 00000000000..f2f908a1d68 --- /dev/null +++ b/queue-4.14/llc-use-refcount_inc_not_zero-for-llc_sap_find.patch @@ -0,0 +1,54 @@ +From foo@baz Sat Aug 18 11:45:17 CEST 2018 +From: Cong Wang +Date: Tue, 7 Aug 2018 12:41:38 -0700 +Subject: llc: use refcount_inc_not_zero() for llc_sap_find() + +From: Cong Wang + +[ Upstream commit 0dcb82254d65f72333aa50ad626d1e9665ad093b ] + +llc_sap_put() decreases the refcnt before deleting sap +from the global list. Therefore, there is a chance +llc_sap_find() could find a sap with zero refcnt +in this global list. + +Close this race condition by checking if refcnt is zero +or not in llc_sap_find(), if it is zero then it is being +removed so we can just treat it as gone. + +Reported-by: +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/llc.h | 5 +++++ + net/llc/llc_core.c | 4 ++-- + 2 files changed, 7 insertions(+), 2 deletions(-) + +--- a/include/net/llc.h ++++ b/include/net/llc.h +@@ -116,6 +116,11 @@ static inline void llc_sap_hold(struct l + refcount_inc(&sap->refcnt); + } + ++static inline bool llc_sap_hold_safe(struct llc_sap *sap) ++{ ++ return refcount_inc_not_zero(&sap->refcnt); ++} ++ + void llc_sap_close(struct llc_sap *sap); + + static inline void llc_sap_put(struct llc_sap *sap) +--- a/net/llc/llc_core.c ++++ b/net/llc/llc_core.c +@@ -73,8 +73,8 @@ struct llc_sap *llc_sap_find(unsigned ch + + rcu_read_lock_bh(); + sap = __llc_sap_find(sap_value); +- if (sap) +- llc_sap_hold(sap); ++ if (!sap || !llc_sap_hold_safe(sap)) ++ sap = NULL; + rcu_read_unlock_bh(); + return sap; + } diff --git a/queue-4.14/net-aquantia-fix-iff_allmulti-flag-functionality.patch b/queue-4.14/net-aquantia-fix-iff_allmulti-flag-functionality.patch new file mode 100644 index 00000000000..c9e368ca0b4 --- /dev/null +++ b/queue-4.14/net-aquantia-fix-iff_allmulti-flag-functionality.patch @@ -0,0 +1,36 @@ +From foo@baz Sat Aug 18 11:45:17 CEST 2018 +From: Dmitry Bogdanov +Date: Wed, 8 Aug 2018 14:06:32 +0300 +Subject: net: aquantia: Fix IFF_ALLMULTI flag functionality + +From: Dmitry Bogdanov + +[ Upstream commit 11ba961c916127651e12af6cad3891f8aeb25aa9 ] + +It was noticed that NIC always pass all multicast traffic to the host +regardless of IFF_ALLMULTI flag on the interface. +The rule in MC Filter Table in NIC, that is configured to accept any +multicast packets, is turning on if IFF_MULTICAST flag is set on the +interface. It leads to passing all multicast traffic to the host. +This fix changes the condition to turn on that rule by checking +IFF_ALLMULTI flag as it should. + +Fixes: b21f502f84be ("net:ethernet:aquantia: Fix for multicast filter handling.") +Signed-off-by: Dmitry Bogdanov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c ++++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +@@ -752,7 +752,7 @@ static int hw_atl_b0_hw_packet_filter_se + + rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC)); + rpfl2multicast_flr_en_set(self, +- IS_FILTER_ENABLED(IFF_MULTICAST), 0); ++ IS_FILTER_ENABLED(IFF_ALLMULTI), 0); + + rpfl2_accept_all_mc_packets_set(self, + IS_FILTER_ENABLED(IFF_ALLMULTI)); diff --git a/queue-4.14/net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch b/queue-4.14/net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch new file mode 100644 index 00000000000..a65ca162c11 --- /dev/null +++ b/queue-4.14/net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch @@ -0,0 +1,84 @@ +From foo@baz Sat Aug 18 11:45:17 CEST 2018 +From: Hangbin Liu +Date: Mon, 13 Aug 2018 18:44:04 +0800 +Subject: net_sched: Fix missing res info when create new tc_index filter + +From: Hangbin Liu + +[ Upstream commit 008369dcc5f7bfba526c98054f8525322acf0ea3 ] + +Li Shuang reported the following warn: + +[ 733.484610] WARNING: CPU: 6 PID: 21123 at net/sched/sch_cbq.c:1418 cbq_destroy_class+0x5d/0x70 [sch_cbq] +[ 733.495190] Modules linked in: sch_cbq cls_tcindex sch_dsmark rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat l +[ 733.574155] syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm igb ixgbe ahci libahci i2c_algo_bit libata i40e i2c_core dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod +[ 733.592500] CPU: 6 PID: 21123 Comm: tc Not tainted 4.18.0-rc8.latest+ #131 +[ 733.600169] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016 +[ 733.608518] RIP: 0010:cbq_destroy_class+0x5d/0x70 [sch_cbq] +[ 733.614734] Code: e7 d9 d2 48 8b 7b 48 e8 61 05 da d2 48 8d bb f8 00 00 00 e8 75 ae d5 d2 48 39 eb 74 0a 48 89 df 5b 5d e9 16 6c 94 d2 5b 5d c3 <0f> 0b eb b6 0f 1f 44 00 00 66 2e 0f 1f 84 +[ 733.635798] RSP: 0018:ffffbfbb066bb9d8 EFLAGS: 00010202 +[ 733.641627] RAX: 0000000000000001 RBX: ffff9cdd17392800 RCX: 000000008010000f +[ 733.649588] RDX: ffff9cdd1df547e0 RSI: ffff9cdd17392800 RDI: ffff9cdd0f84c800 +[ 733.657547] RBP: ffff9cdd0f84c800 R08: 0000000000000001 R09: 0000000000000000 +[ 733.665508] R10: ffff9cdd0f84d000 R11: 0000000000000001 R12: 0000000000000001 +[ 733.673469] R13: 0000000000000000 R14: 0000000000000001 R15: ffff9cdd17392200 +[ 733.681430] FS: 00007f911890a740(0000) GS:ffff9cdd1f8c0000(0000) knlGS:0000000000000000 +[ 733.690456] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 733.696864] CR2: 0000000000b5544c CR3: 0000000859374002 CR4: 00000000001606e0 +[ 733.704826] Call Trace: +[ 733.707554] cbq_destroy+0xa1/0xd0 [sch_cbq] +[ 733.712318] qdisc_destroy+0x62/0x130 +[ 733.716401] dsmark_destroy+0x2a/0x70 [sch_dsmark] +[ 733.721745] qdisc_destroy+0x62/0x130 +[ 733.725829] qdisc_graft+0x3ba/0x470 +[ 733.729817] tc_get_qdisc+0x2a6/0x2c0 +[ 733.733901] ? cred_has_capability+0x7d/0x130 +[ 733.738761] rtnetlink_rcv_msg+0x263/0x2d0 +[ 733.743330] ? rtnl_calcit.isra.30+0x110/0x110 +[ 733.748287] netlink_rcv_skb+0x4d/0x130 +[ 733.752576] netlink_unicast+0x1a3/0x250 +[ 733.756949] netlink_sendmsg+0x2ae/0x3a0 +[ 733.761324] sock_sendmsg+0x36/0x40 +[ 733.765213] ___sys_sendmsg+0x26f/0x2d0 +[ 733.769493] ? handle_pte_fault+0x586/0xdf0 +[ 733.774158] ? __handle_mm_fault+0x389/0x500 +[ 733.778919] ? __sys_sendmsg+0x5e/0xa0 +[ 733.783099] __sys_sendmsg+0x5e/0xa0 +[ 733.787087] do_syscall_64+0x5b/0x180 +[ 733.791171] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 733.796805] RIP: 0033:0x7f9117f23f10 +[ 733.800791] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 +[ 733.821873] RSP: 002b:00007ffe96818398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +[ 733.830319] RAX: ffffffffffffffda RBX: 000000005b71244c RCX: 00007f9117f23f10 +[ 733.838280] RDX: 0000000000000000 RSI: 00007ffe968183e0 RDI: 0000000000000003 +[ 733.846241] RBP: 00007ffe968183e0 R08: 000000000000ffff R09: 0000000000000003 +[ 733.854202] R10: 00007ffe96817e20 R11: 0000000000000246 R12: 0000000000000000 +[ 733.862161] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000 +[ 733.870121] ---[ end trace 28edd4aad712ddca ]--- + +This is because we didn't update f->result.res when create new filter. Then in +tcindex_delete() -> tcf_unbind_filter(), we will failed to find out the res +and unbind filter, which will trigger the WARN_ON() in cbq_destroy_class(). + +Fix it by updating f->result.res when create new filter. + +Fixes: 6e0565697a106 ("net_sched: fix another crash in cls_tcindex") +Reported-by: Li Shuang +Signed-off-by: Hangbin Liu +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_tcindex.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sched/cls_tcindex.c ++++ b/net/sched/cls_tcindex.c +@@ -482,6 +482,7 @@ tcindex_set_parms(struct net *net, struc + struct tcindex_filter *nfp; + struct tcindex_filter __rcu **fp; + ++ f->result.res = r->res; + tcf_exts_change(&f->result.exts, &r->exts); + + fp = cp->h + (handle % cp->hash); diff --git a/queue-4.14/net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch b/queue-4.14/net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch new file mode 100644 index 00000000000..fc9785b6be8 --- /dev/null +++ b/queue-4.14/net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch @@ -0,0 +1,120 @@ +From foo@baz Sat Aug 18 11:45:17 CEST 2018 +From: Hangbin Liu +Date: Mon, 13 Aug 2018 18:44:03 +0800 +Subject: net_sched: fix NULL pointer dereference when delete tcindex filter + +From: Hangbin Liu + +[ Upstream commit 2df8bee5654bb2b7312662ca6810d4dc16b0b67f ] + +Li Shuang reported the following crash: + +[ 71.267724] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 +[ 71.276456] PGD 800000085d9bd067 P4D 800000085d9bd067 PUD 859a0b067 PMD 0 +[ 71.284127] Oops: 0000 [#1] SMP PTI +[ 71.288015] CPU: 12 PID: 2386 Comm: tc Not tainted 4.18.0-rc8.latest+ #131 +[ 71.295686] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016 +[ 71.304037] RIP: 0010:tcindex_delete+0x72/0x280 [cls_tcindex] +[ 71.310446] Code: 00 31 f6 48 87 75 20 48 85 f6 74 11 48 8b 47 18 48 8b 40 08 48 8b 40 50 e8 fb a6 f8 fc 48 85 db 0f 84 dc 00 00 00 48 8b 73 18 <8b> 56 04 48 8d 7e 04 85 d2 0f 84 7b 01 00 +[ 71.331517] RSP: 0018:ffffb45207b3f898 EFLAGS: 00010282 +[ 71.337345] RAX: ffff8ad3d72d6360 RBX: ffff8acc84393680 RCX: 000000000000002e +[ 71.345306] RDX: ffff8ad3d72c8570 RSI: 0000000000000000 RDI: ffff8ad847a45800 +[ 71.353277] RBP: ffff8acc84393688 R08: ffff8ad3d72c8400 R09: 0000000000000000 +[ 71.361238] R10: ffff8ad3de786e00 R11: 0000000000000000 R12: ffffb45207b3f8c7 +[ 71.369199] R13: ffff8ad3d93bd2a0 R14: 000000000000002e R15: ffff8ad3d72c9600 +[ 71.377161] FS: 00007f9d3ec3e740(0000) GS:ffff8ad3df980000(0000) knlGS:0000000000000000 +[ 71.386188] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 71.392597] CR2: 0000000000000004 CR3: 0000000852f06003 CR4: 00000000001606e0 +[ 71.400558] Call Trace: +[ 71.403299] tcindex_destroy_element+0x25/0x40 [cls_tcindex] +[ 71.409611] tcindex_walk+0xbb/0x110 [cls_tcindex] +[ 71.414953] tcindex_destroy+0x44/0x90 [cls_tcindex] +[ 71.420492] ? tcindex_delete+0x280/0x280 [cls_tcindex] +[ 71.426323] tcf_proto_destroy+0x16/0x40 +[ 71.430696] tcf_chain_flush+0x51/0x70 +[ 71.434876] tcf_block_put_ext.part.30+0x8f/0x1b0 +[ 71.440122] tcf_block_put+0x4d/0x70 +[ 71.444108] cbq_destroy+0x4d/0xd0 [sch_cbq] +[ 71.448869] qdisc_destroy+0x62/0x130 +[ 71.452951] dsmark_destroy+0x2a/0x70 [sch_dsmark] +[ 71.458300] qdisc_destroy+0x62/0x130 +[ 71.462373] qdisc_graft+0x3ba/0x470 +[ 71.466359] tc_get_qdisc+0x2a6/0x2c0 +[ 71.470443] ? cred_has_capability+0x7d/0x130 +[ 71.475307] rtnetlink_rcv_msg+0x263/0x2d0 +[ 71.479875] ? rtnl_calcit.isra.30+0x110/0x110 +[ 71.484832] netlink_rcv_skb+0x4d/0x130 +[ 71.489109] netlink_unicast+0x1a3/0x250 +[ 71.493482] netlink_sendmsg+0x2ae/0x3a0 +[ 71.497859] sock_sendmsg+0x36/0x40 +[ 71.501748] ___sys_sendmsg+0x26f/0x2d0 +[ 71.506029] ? handle_pte_fault+0x586/0xdf0 +[ 71.510694] ? __handle_mm_fault+0x389/0x500 +[ 71.515457] ? __sys_sendmsg+0x5e/0xa0 +[ 71.519636] __sys_sendmsg+0x5e/0xa0 +[ 71.523626] do_syscall_64+0x5b/0x180 +[ 71.527711] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 71.533345] RIP: 0033:0x7f9d3e257f10 +[ 71.537331] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 +[ 71.558401] RSP: 002b:00007fff6f893398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +[ 71.566848] RAX: ffffffffffffffda RBX: 000000005b71274d RCX: 00007f9d3e257f10 +[ 71.574810] RDX: 0000000000000000 RSI: 00007fff6f8933e0 RDI: 0000000000000003 +[ 71.582770] RBP: 00007fff6f8933e0 R08: 000000000000ffff R09: 0000000000000003 +[ 71.590729] R10: 00007fff6f892e20 R11: 0000000000000246 R12: 0000000000000000 +[ 71.598689] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000 +[ 71.606651] Modules linked in: sch_cbq cls_tcindex sch_dsmark xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_coni +[ 71.685425] libahci i2c_algo_bit i2c_core i40e libata dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod +[ 71.697075] CR2: 0000000000000004 +[ 71.700792] ---[ end trace f604eb1acacd978b ]--- + +Reproducer: +tc qdisc add dev lo handle 1:0 root dsmark indices 64 set_tc_index +tc filter add dev lo parent 1:0 protocol ip prio 1 tcindex mask 0xfc shift 2 +tc qdisc add dev lo parent 1:0 handle 2:0 cbq bandwidth 10Mbit cell 8 avpkt 1000 mpu 64 +tc class add dev lo parent 2:0 classid 2:1 cbq bandwidth 10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated allot 1514 weight 1 maxburst 10 +tc filter add dev lo parent 2:0 protocol ip prio 1 handle 0x2e tcindex classid 2:1 pass_on +tc qdisc add dev lo parent 2:1 pfifo limit 5 +tc qdisc del dev lo root + +This is because in tcindex_set_parms, when there is no old_r, we set new +exts to cr.exts. And we didn't set it to filter when r == &new_filter_result. + +Then in tcindex_delete() -> tcf_exts_get_net(), we will get NULL pointer +dereference as we didn't init exts. + +Fix it by moving tcf_exts_change() after "if (old_r && old_r != r)" check. +Then we don't need "cr" as there is no errout after that. + +Fixes: bf63ac73b3e13 ("net_sched: fix an oops in tcindex filter") +Reported-by: Li Shuang +Signed-off-by: Hangbin Liu +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_tcindex.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/net/sched/cls_tcindex.c ++++ b/net/sched/cls_tcindex.c +@@ -464,11 +464,6 @@ tcindex_set_parms(struct net *net, struc + tcf_bind_filter(tp, &cr.res, base); + } + +- if (old_r) +- tcf_exts_change(&r->exts, &e); +- else +- tcf_exts_change(&cr.exts, &e); +- + if (old_r && old_r != r) { + err = tcindex_filter_result_init(old_r); + if (err < 0) { +@@ -479,6 +474,8 @@ tcindex_set_parms(struct net *net, struc + + oldp = p; + r->res = cr.res; ++ tcf_exts_change(&r->exts, &e); ++ + rcu_assign_pointer(tp->root, cp); + + if (r == &new_filter_result) { diff --git a/queue-4.14/series b/queue-4.14/series new file mode 100644 index 00000000000..b3b83020fbc --- /dev/null +++ b/queue-4.14/series @@ -0,0 +1,9 @@ +dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch +l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch +llc-use-refcount_inc_not_zero-for-llc_sap_find.patch +net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch +vsock-split-dwork-to-avoid-reinitializations.patch +net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch +vhost-reset-metadata-cache-when-initializing-new-iotlb.patch +ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch +net-aquantia-fix-iff_allmulti-flag-functionality.patch diff --git a/queue-4.14/vhost-reset-metadata-cache-when-initializing-new-iotlb.patch b/queue-4.14/vhost-reset-metadata-cache-when-initializing-new-iotlb.patch new file mode 100644 index 00000000000..e8aca567738 --- /dev/null +++ b/queue-4.14/vhost-reset-metadata-cache-when-initializing-new-iotlb.patch @@ -0,0 +1,40 @@ +From foo@baz Sat Aug 18 11:45:17 CEST 2018 +From: Jason Wang +Date: Wed, 8 Aug 2018 11:43:04 +0800 +Subject: vhost: reset metadata cache when initializing new IOTLB + +From: Jason Wang + +[ Upstream commit b13f9c6364373a1b9f71e9846dc4fb199296f926 ] + +We need to reset metadata cache during new IOTLB initialization, +otherwise the stale pointers to previous IOTLB may be still accessed +which will lead a use after free. + +Reported-by: syzbot+c51e6736a1bf614b3272@syzkaller.appspotmail.com +Fixes: f88949138058 ("vhost: introduce O(1) vq metadata cache") +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/vhost.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/drivers/vhost/vhost.c ++++ b/drivers/vhost/vhost.c +@@ -1578,9 +1578,12 @@ int vhost_init_device_iotlb(struct vhost + d->iotlb = niotlb; + + for (i = 0; i < d->nvqs; ++i) { +- mutex_lock(&d->vqs[i]->mutex); +- d->vqs[i]->iotlb = niotlb; +- mutex_unlock(&d->vqs[i]->mutex); ++ struct vhost_virtqueue *vq = d->vqs[i]; ++ ++ mutex_lock(&vq->mutex); ++ vq->iotlb = niotlb; ++ __vhost_vq_meta_reset(vq); ++ mutex_unlock(&vq->mutex); + } + + vhost_umem_clean(oiotlb); diff --git a/queue-4.14/vsock-split-dwork-to-avoid-reinitializations.patch b/queue-4.14/vsock-split-dwork-to-avoid-reinitializations.patch new file mode 100644 index 00000000000..dd1e5db587e --- /dev/null +++ b/queue-4.14/vsock-split-dwork-to-avoid-reinitializations.patch @@ -0,0 +1,134 @@ +From foo@baz Sat Aug 18 11:45:17 CEST 2018 +From: Cong Wang +Date: Mon, 6 Aug 2018 11:06:02 -0700 +Subject: vsock: split dwork to avoid reinitializations + +From: Cong Wang + +[ Upstream commit 455f05ecd2b219e9a216050796d30c830d9bc393 ] + +syzbot reported that we reinitialize an active delayed +work in vsock_stream_connect(): + + ODEBUG: init active (active state 0) object type: timer_list hint: + delayed_work_timer_fn+0x0/0x90 kernel/workqueue.c:1414 + WARNING: CPU: 1 PID: 11518 at lib/debugobjects.c:329 + debug_print_object+0x16a/0x210 lib/debugobjects.c:326 + +The pattern is apparently wrong, we should only initialize +the dealyed work once and could repeatly schedule it. So we +have to move out the initializations to allocation side. +And to avoid confusion, we can split the shared dwork +into two, instead of re-using the same one. + +Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") +Reported-by: +Cc: Andy king +Cc: Stefan Hajnoczi +Cc: Jorgen Hansen +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/af_vsock.h | 4 ++-- + net/vmw_vsock/af_vsock.c | 15 ++++++++------- + net/vmw_vsock/vmci_transport.c | 3 +-- + 3 files changed, 11 insertions(+), 11 deletions(-) + +--- a/include/net/af_vsock.h ++++ b/include/net/af_vsock.h +@@ -62,7 +62,8 @@ struct vsock_sock { + struct list_head pending_links; + struct list_head accept_queue; + bool rejected; +- struct delayed_work dwork; ++ struct delayed_work connect_work; ++ struct delayed_work pending_work; + struct delayed_work close_work; + bool close_work_scheduled; + u32 peer_shutdown; +@@ -75,7 +76,6 @@ struct vsock_sock { + + s64 vsock_stream_has_data(struct vsock_sock *vsk); + s64 vsock_stream_has_space(struct vsock_sock *vsk); +-void vsock_pending_work(struct work_struct *work); + struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -449,14 +449,14 @@ static int vsock_send_shutdown(struct so + return transport->shutdown(vsock_sk(sk), mode); + } + +-void vsock_pending_work(struct work_struct *work) ++static void vsock_pending_work(struct work_struct *work) + { + struct sock *sk; + struct sock *listener; + struct vsock_sock *vsk; + bool cleanup; + +- vsk = container_of(work, struct vsock_sock, dwork.work); ++ vsk = container_of(work, struct vsock_sock, pending_work.work); + sk = sk_vsock(vsk); + listener = vsk->listener; + cleanup = true; +@@ -496,7 +496,6 @@ out: + sock_put(sk); + sock_put(listener); + } +-EXPORT_SYMBOL_GPL(vsock_pending_work); + + /**** SOCKET OPERATIONS ****/ + +@@ -595,6 +594,8 @@ static int __vsock_bind(struct sock *sk, + return retval; + } + ++static void vsock_connect_timeout(struct work_struct *work); ++ + struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, +@@ -637,6 +638,8 @@ struct sock *__vsock_create(struct net * + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + vsk->peer_shutdown = 0; ++ INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout); ++ INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work); + + psk = parent ? vsock_sk(parent) : NULL; + if (parent) { +@@ -1116,7 +1119,7 @@ static void vsock_connect_timeout(struct + struct vsock_sock *vsk; + int cancel = 0; + +- vsk = container_of(work, struct vsock_sock, dwork.work); ++ vsk = container_of(work, struct vsock_sock, connect_work.work); + sk = sk_vsock(vsk); + + lock_sock(sk); +@@ -1220,9 +1223,7 @@ static int vsock_stream_connect(struct s + * timeout fires. + */ + sock_hold(sk); +- INIT_DELAYED_WORK(&vsk->dwork, +- vsock_connect_timeout); +- schedule_delayed_work(&vsk->dwork, timeout); ++ schedule_delayed_work(&vsk->connect_work, timeout); + + /* Skip ahead to preserve error code set above. */ + goto out_wait; +--- a/net/vmw_vsock/vmci_transport.c ++++ b/net/vmw_vsock/vmci_transport.c +@@ -1091,8 +1091,7 @@ static int vmci_transport_recv_listen(st + vpending->listener = sk; + sock_hold(sk); + sock_hold(pending); +- INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); +- schedule_delayed_work(&vpending->dwork, HZ); ++ schedule_delayed_work(&vpending->pending_work, HZ); + + out: + return err; -- 2.47.3