From e9f6cec612d6fc43da57a41b385b1b070c529dfd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 18 Aug 2018 11:43:44 +0200 Subject: [PATCH] 4.17-stable patches added patches: dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch llc-use-refcount_inc_not_zero-for-llc_sap_find.patch mlxsw-core_acl_flex_actions-remove-redundant-counter-destruction.patch mlxsw-core_acl_flex_actions-remove-redundant-mirror-resource-destruction.patch mlxsw-core_acl_flex_actions-remove-redundant-resource-destruction.patch mlxsw-core_acl_flex_actions-return-error-for-conflicting-actions.patch net-aquantia-fix-iff_allmulti-flag-functionality.patch net-mlx5e-properly-check-if-hairpin-is-possible-between-two-functions.patch net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch r8169-don-t-use-msi-x-on-rtl8168g.patch rxrpc-fix-the-keepalive-generator.patch vhost-reset-metadata-cache-when-initializing-new-iotlb.patch vsock-split-dwork-to-avoid-reinitializations.patch --- ...ith-cwnd-shift-in-ccid2_cwnd_restart.patch | 75 +++ ...r-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch | 43 ++ ...eck-to-avoid-race-on-sk-sk_dst_cache.patch | 95 ++++ ...fcount_inc_not_zero-for-llc_sap_find.patch | 54 +++ ...remove-redundant-counter-destruction.patch | 36 ++ ...edundant-mirror-resource-destruction.patch | 46 ++ ...emove-redundant-resource-destruction.patch | 72 +++ ...return-error-for-conflicting-actions.patch | 163 +++++++ ...-fix-iff_allmulti-flag-functionality.patch | 36 ++ ...in-is-possible-between-two-functions.patch | 46 ++ ...info-when-create-new-tc_index-filter.patch | 84 ++++ ...reference-when-delete-tcindex-filter.patch | 120 +++++ .../r8169-don-t-use-msi-x-on-rtl8168g.patch | 42 ++ .../rxrpc-fix-the-keepalive-generator.patch | 454 ++++++++++++++++++ queue-4.17/series | 16 + ...ta-cache-when-initializing-new-iotlb.patch | 40 ++ ...lit-dwork-to-avoid-reinitializations.patch | 134 ++++++ 17 files changed, 1556 insertions(+) create mode 100644 queue-4.17/dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch create mode 100644 queue-4.17/ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch create mode 100644 queue-4.17/l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch create mode 100644 queue-4.17/llc-use-refcount_inc_not_zero-for-llc_sap_find.patch create mode 100644 queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-counter-destruction.patch create mode 100644 queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-mirror-resource-destruction.patch create mode 100644 queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-resource-destruction.patch create mode 100644 queue-4.17/mlxsw-core_acl_flex_actions-return-error-for-conflicting-actions.patch create mode 100644 queue-4.17/net-aquantia-fix-iff_allmulti-flag-functionality.patch create mode 100644 queue-4.17/net-mlx5e-properly-check-if-hairpin-is-possible-between-two-functions.patch create mode 100644 queue-4.17/net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch create mode 100644 queue-4.17/net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch create mode 100644 queue-4.17/r8169-don-t-use-msi-x-on-rtl8168g.patch create mode 100644 queue-4.17/rxrpc-fix-the-keepalive-generator.patch create mode 100644 queue-4.17/series create mode 100644 queue-4.17/vhost-reset-metadata-cache-when-initializing-new-iotlb.patch create mode 100644 queue-4.17/vsock-split-dwork-to-avoid-reinitializations.patch diff --git a/queue-4.17/dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch b/queue-4.17/dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch new file mode 100644 index 00000000000..3bc55ee9019 --- /dev/null +++ b/queue-4.17/dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch @@ -0,0 +1,75 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Alexey Kodanev +Date: Tue, 7 Aug 2018 20:03:57 +0300 +Subject: dccp: fix undefined behavior with 'cwnd' shift in ccid2_cwnd_restart() + +From: Alexey Kodanev + +[ Upstream commit 61ef4b07fcdc30535889990cf4229766502561cf ] + +The shift of 'cwnd' with '(now - hc->tx_lsndtime) / hc->tx_rto' value +can lead to undefined behavior [1]. + +In order to fix this use a gradual shift of the window with a 'while' +loop, similar to what tcp_cwnd_restart() is doing. + +When comparing delta and RTO there is a minor difference between TCP +and DCCP, the last one also invokes dccp_cwnd_restart() and reduces +'cwnd' if delta equals RTO. That case is preserved in this change. + +[1]: +[40850.963623] UBSAN: Undefined behaviour in net/dccp/ccids/ccid2.c:237:7 +[40851.043858] shift exponent 67 is too large for 32-bit type 'unsigned int' +[40851.127163] CPU: 3 PID: 15940 Comm: netstress Tainted: G W E 4.18.0-rc7.x86_64 #1 +... +[40851.377176] Call Trace: +[40851.408503] dump_stack+0xf1/0x17b +[40851.451331] ? show_regs_print_info+0x5/0x5 +[40851.503555] ubsan_epilogue+0x9/0x7c +[40851.548363] __ubsan_handle_shift_out_of_bounds+0x25b/0x2b4 +[40851.617109] ? __ubsan_handle_load_invalid_value+0x18f/0x18f +[40851.686796] ? xfrm4_output_finish+0x80/0x80 +[40851.739827] ? lock_downgrade+0x6d0/0x6d0 +[40851.789744] ? xfrm4_prepare_output+0x160/0x160 +[40851.845912] ? ip_queue_xmit+0x810/0x1db0 +[40851.895845] ? ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp] +[40851.963530] ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp] +[40852.029063] dccp_xmit_packet+0x1d3/0x720 [dccp] +[40852.086254] dccp_write_xmit+0x116/0x1d0 [dccp] +[40852.142412] dccp_sendmsg+0x428/0xb20 [dccp] +[40852.195454] ? inet_dccp_listen+0x200/0x200 [dccp] +[40852.254833] ? sched_clock+0x5/0x10 +[40852.298508] ? sched_clock+0x5/0x10 +[40852.342194] ? inet_create+0xdf0/0xdf0 +[40852.388988] sock_sendmsg+0xd9/0x160 +... + +Fixes: 113ced1f52e5 ("dccp ccid-2: Perform congestion-window validation") +Signed-off-by: Alexey Kodanev +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ccids/ccid2.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/dccp/ccids/ccid2.c ++++ b/net/dccp/ccids/ccid2.c +@@ -228,14 +228,16 @@ static void ccid2_cwnd_restart(struct so + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + u32 cwnd = hc->tx_cwnd, restart_cwnd, + iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache); ++ s32 delta = now - hc->tx_lsndtime; + + hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2)); + + /* don't reduce cwnd below the initial window (IW) */ + restart_cwnd = min(cwnd, iwnd); +- cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto; +- hc->tx_cwnd = max(cwnd, restart_cwnd); + ++ while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd) ++ cwnd >>= 1; ++ hc->tx_cwnd = max(cwnd, restart_cwnd); + hc->tx_cwnd_stamp = now; + hc->tx_cwnd_used = 0; + diff --git a/queue-4.17/ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch b/queue-4.17/ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch new file mode 100644 index 00000000000..38594510752 --- /dev/null +++ b/queue-4.17/ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch @@ -0,0 +1,43 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Xin Long +Date: Sun, 5 Aug 2018 22:46:07 +0800 +Subject: ip6_tunnel: use the right value for ipv4 min mtu check in ip6_tnl_xmit + +From: Xin Long + +[ Upstream commit 82a40777de12728dedf4075453b694f0d1baee80 ] + +According to RFC791, 68 bytes is the minimum size of IPv4 datagram every +device must be able to forward without further fragmentation while 576 +bytes is the minimum size of IPv4 datagram every device has to be able +to receive, so in ip6_tnl_xmit(), 68(IPV4_MIN_MTU) should be the right +value for the ipv4 min mtu check in ip6_tnl_xmit. + +While at it, change to use max() instead of if statement. + +Fixes: c9fefa08190f ("ip6_tunnel: get the min mtu properly in ip6_tnl_xmit") +Reported-by: Sabrina Dubroca +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1133,12 +1133,8 @@ route_lookup: + max_headroom += 8; + mtu -= 8; + } +- if (skb->protocol == htons(ETH_P_IPV6)) { +- if (mtu < IPV6_MIN_MTU) +- mtu = IPV6_MIN_MTU; +- } else if (mtu < 576) { +- mtu = 576; +- } ++ mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ? ++ IPV6_MIN_MTU : IPV4_MIN_MTU); + + skb_dst_update_pmtu(skb, mtu); + if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { diff --git a/queue-4.17/l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch b/queue-4.17/l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch new file mode 100644 index 00000000000..7f2fd90c344 --- /dev/null +++ b/queue-4.17/l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch @@ -0,0 +1,95 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Wei Wang +Date: Fri, 10 Aug 2018 11:14:56 -0700 +Subject: l2tp: use sk_dst_check() to avoid race on sk->sk_dst_cache + +From: Wei Wang + +[ Upstream commit 6d37fa49da1e8db8fb1995be22ac837ca41ac8a8 ] + +In l2tp code, if it is a L2TP_UDP_ENCAP tunnel, tunnel->sk points to a +UDP socket. User could call sendmsg() on both this tunnel and the UDP +socket itself concurrently. As l2tp_xmit_skb() holds socket lock and call +__sk_dst_check() to refresh sk->sk_dst_cache, while udpv6_sendmsg() is +lockless and call sk_dst_check() to refresh sk->sk_dst_cache, there +could be a race and cause the dst cache to be freed multiple times. +So we fix l2tp side code to always call sk_dst_check() to garantee +xchg() is called when refreshing sk->sk_dst_cache to avoid race +conditions. + +Syzkaller reported stack trace: +BUG: KASAN: use-after-free in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] +BUG: KASAN: use-after-free in atomic_fetch_add_unless include/linux/atomic.h:575 [inline] +BUG: KASAN: use-after-free in atomic_add_unless include/linux/atomic.h:597 [inline] +BUG: KASAN: use-after-free in dst_hold_safe include/net/dst.h:308 [inline] +BUG: KASAN: use-after-free in ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029 +Read of size 4 at addr ffff8801aea9a880 by task syz-executor129/4829 + +CPU: 0 PID: 4829 Comm: syz-executor129 Not tainted 4.18.0-rc7-next-20180802+ #30 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 + print_address_description+0x6c/0x20b mm/kasan/report.c:256 + kasan_report_error mm/kasan/report.c:354 [inline] + kasan_report.cold.7+0x242/0x30d mm/kasan/report.c:412 + check_memory_region_inline mm/kasan/kasan.c:260 [inline] + check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267 + kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272 + atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] + atomic_fetch_add_unless include/linux/atomic.h:575 [inline] + atomic_add_unless include/linux/atomic.h:597 [inline] + dst_hold_safe include/net/dst.h:308 [inline] + ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029 + rt6_get_pcpu_route net/ipv6/route.c:1249 [inline] + ip6_pol_route+0x354/0xd20 net/ipv6/route.c:1922 + ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2098 + fib6_rule_lookup+0x283/0x890 net/ipv6/fib6_rules.c:122 + ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2126 + ip6_dst_lookup_tail+0x1278/0x1da0 net/ipv6/ip6_output.c:978 + ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079 + ip6_sk_dst_lookup_flow+0x5ed/0xc50 net/ipv6/ip6_output.c:1117 + udpv6_sendmsg+0x2163/0x36b0 net/ipv6/udp.c:1354 + inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 + sock_sendmsg_nosec net/socket.c:622 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:632 + ___sys_sendmsg+0x51d/0x930 net/socket.c:2115 + __sys_sendmmsg+0x240/0x6f0 net/socket.c:2210 + __do_sys_sendmmsg net/socket.c:2239 [inline] + __se_sys_sendmmsg net/socket.c:2236 [inline] + __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2236 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x446a29 +Code: e8 ac b8 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007f4de5532db8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 +RAX: ffffffffffffffda RBX: 00000000006dcc38 RCX: 0000000000446a29 +RDX: 00000000000000b8 RSI: 0000000020001b00 RDI: 0000000000000003 +RBP: 00000000006dcc30 R08: 00007f4de5533700 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dcc3c +R13: 00007ffe2b830fdf R14: 00007f4de55339c0 R15: 0000000000000001 + +Fixes: 71b1391a4128 ("l2tp: ensure sk->dst is still valid") +Reported-by: syzbot+05f840f3b04f211bad55@syzkaller.appspotmail.com +Signed-off-by: Wei Wang +Signed-off-by: Martin KaFai Lau +Cc: Guillaume Nault +Cc: David Ahern +Cc: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -1110,7 +1110,7 @@ int l2tp_xmit_skb(struct l2tp_session *s + + /* Get routing info from the tunnel socket */ + skb_dst_drop(skb); +- skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0))); ++ skb_dst_set(skb, sk_dst_check(sk, 0)); + + inet = inet_sk(sk); + fl = &inet->cork.fl; diff --git a/queue-4.17/llc-use-refcount_inc_not_zero-for-llc_sap_find.patch b/queue-4.17/llc-use-refcount_inc_not_zero-for-llc_sap_find.patch new file mode 100644 index 00000000000..81c750ad3c0 --- /dev/null +++ b/queue-4.17/llc-use-refcount_inc_not_zero-for-llc_sap_find.patch @@ -0,0 +1,54 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Cong Wang +Date: Tue, 7 Aug 2018 12:41:38 -0700 +Subject: llc: use refcount_inc_not_zero() for llc_sap_find() + +From: Cong Wang + +[ Upstream commit 0dcb82254d65f72333aa50ad626d1e9665ad093b ] + +llc_sap_put() decreases the refcnt before deleting sap +from the global list. Therefore, there is a chance +llc_sap_find() could find a sap with zero refcnt +in this global list. + +Close this race condition by checking if refcnt is zero +or not in llc_sap_find(), if it is zero then it is being +removed so we can just treat it as gone. + +Reported-by: +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/llc.h | 5 +++++ + net/llc/llc_core.c | 4 ++-- + 2 files changed, 7 insertions(+), 2 deletions(-) + +--- a/include/net/llc.h ++++ b/include/net/llc.h +@@ -116,6 +116,11 @@ static inline void llc_sap_hold(struct l + refcount_inc(&sap->refcnt); + } + ++static inline bool llc_sap_hold_safe(struct llc_sap *sap) ++{ ++ return refcount_inc_not_zero(&sap->refcnt); ++} ++ + void llc_sap_close(struct llc_sap *sap); + + static inline void llc_sap_put(struct llc_sap *sap) +--- a/net/llc/llc_core.c ++++ b/net/llc/llc_core.c +@@ -73,8 +73,8 @@ struct llc_sap *llc_sap_find(unsigned ch + + rcu_read_lock_bh(); + sap = __llc_sap_find(sap_value); +- if (sap) +- llc_sap_hold(sap); ++ if (!sap || !llc_sap_hold_safe(sap)) ++ sap = NULL; + rcu_read_unlock_bh(); + return sap; + } diff --git a/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-counter-destruction.patch b/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-counter-destruction.patch new file mode 100644 index 00000000000..85d27a3ad04 --- /dev/null +++ b/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-counter-destruction.patch @@ -0,0 +1,36 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Nir Dotan +Date: Fri, 3 Aug 2018 15:57:43 +0300 +Subject: mlxsw: core_acl_flex_actions: Remove redundant counter destruction + +From: Nir Dotan + +[ Upstream commit 7cc6169493990dec488eda0a3f6612729ca25e81 ] + +Each tc flower rule uses a hidden count action. As counter resource may +not be available due to limited HW resources, update _counter_create() +and _counter_destroy() pair to follow previously introduced symmetric +error condition handling, add a call to mlxsw_afa_resource_del() as part +of the counter resource destruction. + +Fixes: c18c1e186ba8 ("mlxsw: core: Make counter index allocated inside the action append") +Signed-off-by: Nir Dotan +Reviewed-by: Petr Machata +Reviewed-by: Jiri Pirko +Signed-off-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +@@ -584,6 +584,7 @@ static void + mlxsw_afa_counter_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_counter *counter) + { ++ mlxsw_afa_resource_del(&counter->resource); + block->afa->ops->counter_index_put(block->afa->ops_priv, + counter->counter_index); + kfree(counter); diff --git a/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-mirror-resource-destruction.patch b/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-mirror-resource-destruction.patch new file mode 100644 index 00000000000..24b74f36e92 --- /dev/null +++ b/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-mirror-resource-destruction.patch @@ -0,0 +1,46 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Nir Dotan +Date: Fri, 3 Aug 2018 15:57:44 +0300 +Subject: mlxsw: core_acl_flex_actions: Remove redundant mirror resource destruction + +From: Nir Dotan + +[ Upstream commit caebd1b389708bf3d0465be829480fc706a68720 ] + +In previous patch mlxsw_afa_resource_del() was added to avoid a duplicate +resource detruction scenario. +For mirror actions, such duplicate destruction leads to a crash as in: + + # tc qdisc add dev swp49 ingress + # tc filter add dev swp49 parent ffff: \ + protocol ip chain 100 pref 10 \ + flower skip_sw dst_ip 192.168.101.1 action drop + # tc filter add dev swp49 parent ffff: \ + protocol ip pref 10 \ + flower skip_sw dst_ip 192.168.101.1 action goto chain 100 \ + action mirred egress mirror dev swp4 + +Therefore add a call to mlxsw_afa_resource_del() in +mlxsw_afa_mirror_destroy() in order to clear that resource +from rule's resources. + +Fixes: d0d13c1858a1 ("mlxsw: spectrum_acl: Add support for mirror action") +Signed-off-by: Nir Dotan +Reviewed-by: Jiri Pirko +Signed-off-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +@@ -862,6 +862,7 @@ static void + mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_mirror *mirror) + { ++ mlxsw_afa_resource_del(&mirror->resource); + block->afa->ops->mirror_del(block->afa->ops_priv, + mirror->local_in_port, + mirror->span_id, diff --git a/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-resource-destruction.patch b/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-resource-destruction.patch new file mode 100644 index 00000000000..0edba8dbec4 --- /dev/null +++ b/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-resource-destruction.patch @@ -0,0 +1,72 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Nir Dotan +Date: Fri, 3 Aug 2018 15:57:42 +0300 +Subject: mlxsw: core_acl_flex_actions: Remove redundant resource destruction + +From: Nir Dotan + +[ Upstream commit dda0a3a3fb92451d4a922e56365ee1f73c8a9586 ] + +Some ACL actions require the allocation of a separate resource +prior to applying the action itself. When facing an error condition +during the setup phase of the action, resource should be destroyed. +For such actions the destruction was done twice which is dangerous +and lead to a potential crash. +The destruction took place first upon error on action setup phase +and then as the rule was destroyed. + +The following sequence generated a crash: + + # tc qdisc add dev swp49 ingress + # tc filter add dev swp49 parent ffff: \ + protocol ip chain 100 pref 10 \ + flower skip_sw dst_ip 192.168.101.1 action drop + # tc filter add dev swp49 parent ffff: \ + protocol ip pref 10 \ + flower skip_sw dst_ip 192.168.101.1 action goto chain 100 \ + action mirred egress mirror dev swp4 + +Therefore add mlxsw_afa_resource_del() as a complement of +mlxsw_afa_resource_add() to add symmetry to resource_list membership +handling. Call this from mlxsw_afa_fwd_entry_ref_destroy() to make the +_fwd_entry_ref_create() and _fwd_entry_ref_destroy() pair of calls a +NOP. + +Fixes: 140ce421217e ("mlxsw: core: Convert fwd_entry_ref list to be generic per-block resource list") +Signed-off-by: Nir Dotan +Reviewed-by: Jiri Pirko +Signed-off-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +@@ -327,12 +327,16 @@ static void mlxsw_afa_resource_add(struc + list_add(&resource->list, &block->resource_list); + } + ++static void mlxsw_afa_resource_del(struct mlxsw_afa_resource *resource) ++{ ++ list_del(&resource->list); ++} ++ + static void mlxsw_afa_resources_destroy(struct mlxsw_afa_block *block) + { + struct mlxsw_afa_resource *resource, *tmp; + + list_for_each_entry_safe(resource, tmp, &block->resource_list, list) { +- list_del(&resource->list); + resource->destructor(block, resource); + } + } +@@ -530,6 +534,7 @@ static void + mlxsw_afa_fwd_entry_ref_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref) + { ++ mlxsw_afa_resource_del(&fwd_entry_ref->resource); + mlxsw_afa_fwd_entry_put(block->afa, fwd_entry_ref->fwd_entry); + kfree(fwd_entry_ref); + } diff --git a/queue-4.17/mlxsw-core_acl_flex_actions-return-error-for-conflicting-actions.patch b/queue-4.17/mlxsw-core_acl_flex_actions-return-error-for-conflicting-actions.patch new file mode 100644 index 00000000000..1bdb3bbf12d --- /dev/null +++ b/queue-4.17/mlxsw-core_acl_flex_actions-return-error-for-conflicting-actions.patch @@ -0,0 +1,163 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Nir Dotan +Date: Fri, 3 Aug 2018 15:57:41 +0300 +Subject: mlxsw: core_acl_flex_actions: Return error for conflicting actions + +From: Nir Dotan + +[ Upstream commit 3757b255bf20ae3c941abae7624ff215bfd9ef05 ] + +Spectrum switch ACL action set is built in groups of three actions +which may point to additional actions. A group holds a single record +which can be set as goto record for pointing at a following group +or can be set to mark the termination of the lookup. This is perfectly +adequate for handling a series of actions to be executed on a packet. +While the SW model allows configuration of conflicting actions +where it is clear that some actions will never execute, the mlxsw +driver must block such configurations as it creates a conflict +over the single terminate/goto record value. + +For a conflicting actions configuration such as: + + # tc filter add dev swp49 parent ffff: \ + protocol ip pref 10 \ + flower skip_sw dst_ip 192.168.101.1 \ + action goto chain 100 \ + action mirred egress mirror dev swp4 + +Where it is clear that the last action will never execute, the +mlxsw driver was issuing a warning instead of returning an error. +Therefore replace that warning with an error for this specific +case. + +Fixes: 4cda7d8d7098 ("mlxsw: core: Introduce flexible actions support") +Signed-off-by: Nir Dotan +Reviewed-by: Jiri Pirko +Signed-off-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 42 ++++++------ + 1 file changed, 21 insertions(+), 21 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +@@ -626,8 +626,8 @@ static char *mlxsw_afa_block_append_acti + char *oneact; + char *actions; + +- if (WARN_ON(block->finished)) +- return NULL; ++ if (block->finished) ++ return ERR_PTR(-EINVAL); + if (block->cur_act_index + action_size > + block->afa->max_acts_per_set) { + struct mlxsw_afa_set *set; +@@ -637,7 +637,7 @@ static char *mlxsw_afa_block_append_acti + */ + set = mlxsw_afa_set_create(false); + if (!set) +- return NULL; ++ return ERR_PTR(-ENOBUFS); + set->prev = block->cur_set; + block->cur_act_index = 0; + block->cur_set->next = set; +@@ -724,8 +724,8 @@ int mlxsw_afa_block_append_vlan_modify(s + MLXSW_AFA_VLAN_CODE, + MLXSW_AFA_VLAN_SIZE); + +- if (!act) +- return -ENOBUFS; ++ if (IS_ERR(act)) ++ return PTR_ERR(act); + mlxsw_afa_vlan_pack(act, MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP, + MLXSW_AFA_VLAN_CMD_SET_OUTER, vid, + MLXSW_AFA_VLAN_CMD_SET_OUTER, pcp, +@@ -806,8 +806,8 @@ int mlxsw_afa_block_append_drop(struct m + MLXSW_AFA_TRAPDISC_CODE, + MLXSW_AFA_TRAPDISC_SIZE); + +- if (!act) +- return -ENOBUFS; ++ if (IS_ERR(act)) ++ return PTR_ERR(act); + mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, 0); + return 0; +@@ -820,8 +820,8 @@ int mlxsw_afa_block_append_trap(struct m + MLXSW_AFA_TRAPDISC_CODE, + MLXSW_AFA_TRAPDISC_SIZE); + +- if (!act) +- return -ENOBUFS; ++ if (IS_ERR(act)) ++ return PTR_ERR(act); + mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, + trap_id); +@@ -836,8 +836,8 @@ int mlxsw_afa_block_append_trap_and_forw + MLXSW_AFA_TRAPDISC_CODE, + MLXSW_AFA_TRAPDISC_SIZE); + +- if (!act) +- return -ENOBUFS; ++ if (IS_ERR(act)) ++ return PTR_ERR(act); + mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, + trap_id); +@@ -908,8 +908,8 @@ mlxsw_afa_block_append_allocated_mirror( + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_TRAPDISC_CODE, + MLXSW_AFA_TRAPDISC_SIZE); +- if (!act) +- return -ENOBUFS; ++ if (IS_ERR(act)) ++ return PTR_ERR(act); + mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, 0); + mlxsw_afa_trapdisc_mirror_pack(act, true, mirror_agent); +@@ -996,8 +996,8 @@ int mlxsw_afa_block_append_fwd(struct ml + + act = mlxsw_afa_block_append_action(block, MLXSW_AFA_FORWARD_CODE, + MLXSW_AFA_FORWARD_SIZE); +- if (!act) { +- err = -ENOBUFS; ++ if (IS_ERR(act)) { ++ err = PTR_ERR(act); + goto err_append_action; + } + mlxsw_afa_forward_pack(act, MLXSW_AFA_FORWARD_TYPE_PBS, +@@ -1052,8 +1052,8 @@ int mlxsw_afa_block_append_allocated_cou + { + char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_POLCNT_CODE, + MLXSW_AFA_POLCNT_SIZE); +- if (!act) +- return -ENOBUFS; ++ if (IS_ERR(act)) ++ return PTR_ERR(act); + mlxsw_afa_polcnt_pack(act, MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES, + counter_index); + return 0; +@@ -1123,8 +1123,8 @@ int mlxsw_afa_block_append_fid_set(struc + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_VIRFWD_CODE, + MLXSW_AFA_VIRFWD_SIZE); +- if (!act) +- return -ENOBUFS; ++ if (IS_ERR(act)) ++ return PTR_ERR(act); + mlxsw_afa_virfwd_pack(act, MLXSW_AFA_VIRFWD_FID_CMD_SET, fid); + return 0; + } +@@ -1193,8 +1193,8 @@ int mlxsw_afa_block_append_mcrouter(stru + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_MCROUTER_CODE, + MLXSW_AFA_MCROUTER_SIZE); +- if (!act) +- return -ENOBUFS; ++ if (IS_ERR(act)) ++ return PTR_ERR(act); + mlxsw_afa_mcrouter_pack(act, MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP, + expected_irif, min_mtu, rmid_valid, kvdl_index); + return 0; diff --git a/queue-4.17/net-aquantia-fix-iff_allmulti-flag-functionality.patch b/queue-4.17/net-aquantia-fix-iff_allmulti-flag-functionality.patch new file mode 100644 index 00000000000..5e34fcd25a0 --- /dev/null +++ b/queue-4.17/net-aquantia-fix-iff_allmulti-flag-functionality.patch @@ -0,0 +1,36 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Dmitry Bogdanov +Date: Wed, 8 Aug 2018 14:06:32 +0300 +Subject: net: aquantia: Fix IFF_ALLMULTI flag functionality + +From: Dmitry Bogdanov + +[ Upstream commit 11ba961c916127651e12af6cad3891f8aeb25aa9 ] + +It was noticed that NIC always pass all multicast traffic to the host +regardless of IFF_ALLMULTI flag on the interface. +The rule in MC Filter Table in NIC, that is configured to accept any +multicast packets, is turning on if IFF_MULTICAST flag is set on the +interface. It leads to passing all multicast traffic to the host. +This fix changes the condition to turn on that rule by checking +IFF_ALLMULTI flag as it should. + +Fixes: b21f502f84be ("net:ethernet:aquantia: Fix for multicast filter handling.") +Signed-off-by: Dmitry Bogdanov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c ++++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +@@ -762,7 +762,7 @@ static int hw_atl_b0_hw_packet_filter_se + + hw_atl_rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC)); + hw_atl_rpfl2multicast_flr_en_set(self, +- IS_FILTER_ENABLED(IFF_MULTICAST), 0); ++ IS_FILTER_ENABLED(IFF_ALLMULTI), 0); + + hw_atl_rpfl2_accept_all_mc_packets_set(self, + IS_FILTER_ENABLED(IFF_ALLMULTI)); diff --git a/queue-4.17/net-mlx5e-properly-check-if-hairpin-is-possible-between-two-functions.patch b/queue-4.17/net-mlx5e-properly-check-if-hairpin-is-possible-between-two-functions.patch new file mode 100644 index 00000000000..86c8fabc943 --- /dev/null +++ b/queue-4.17/net-mlx5e-properly-check-if-hairpin-is-possible-between-two-functions.patch @@ -0,0 +1,46 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Or Gerlitz +Date: Wed, 8 Aug 2018 15:48:07 -0700 +Subject: net/mlx5e: Properly check if hairpin is possible between two functions + +From: Or Gerlitz + +[ Upstream commit 816f670623692b5da2787f278cbfdb331ed29b8a ] + +The current check relies on function BDF addresses and can get +us wrong e.g when two VFs are assigned into a VM and the PCI +v-address is set by the hypervisor. + +Fixes: 5c65c564c962 ('net/mlx5e: Support offloading TC NIC hairpin flows') +Signed-off-by: Or Gerlitz +Reported-by: Alaa Hleihel +Tested-by: Alaa Hleihel +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -1907,15 +1907,15 @@ static bool actions_match_supported(stru + static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) + { + struct mlx5_core_dev *fmdev, *pmdev; +- u16 func_id, peer_id; ++ u64 fsystem_guid, psystem_guid; + + fmdev = priv->mdev; + pmdev = peer_priv->mdev; + +- func_id = (u16)((fmdev->pdev->bus->number << 8) | PCI_SLOT(fmdev->pdev->devfn)); +- peer_id = (u16)((pmdev->pdev->bus->number << 8) | PCI_SLOT(pmdev->pdev->devfn)); ++ mlx5_query_nic_vport_system_image_guid(fmdev, &fsystem_guid); ++ mlx5_query_nic_vport_system_image_guid(pmdev, &psystem_guid); + +- return (func_id == peer_id); ++ return (fsystem_guid == psystem_guid); + } + + static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, diff --git a/queue-4.17/net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch b/queue-4.17/net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch new file mode 100644 index 00000000000..f76d32fd12d --- /dev/null +++ b/queue-4.17/net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch @@ -0,0 +1,84 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Hangbin Liu +Date: Mon, 13 Aug 2018 18:44:04 +0800 +Subject: net_sched: Fix missing res info when create new tc_index filter + +From: Hangbin Liu + +[ Upstream commit 008369dcc5f7bfba526c98054f8525322acf0ea3 ] + +Li Shuang reported the following warn: + +[ 733.484610] WARNING: CPU: 6 PID: 21123 at net/sched/sch_cbq.c:1418 cbq_destroy_class+0x5d/0x70 [sch_cbq] +[ 733.495190] Modules linked in: sch_cbq cls_tcindex sch_dsmark rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat l +[ 733.574155] syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm igb ixgbe ahci libahci i2c_algo_bit libata i40e i2c_core dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod +[ 733.592500] CPU: 6 PID: 21123 Comm: tc Not tainted 4.18.0-rc8.latest+ #131 +[ 733.600169] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016 +[ 733.608518] RIP: 0010:cbq_destroy_class+0x5d/0x70 [sch_cbq] +[ 733.614734] Code: e7 d9 d2 48 8b 7b 48 e8 61 05 da d2 48 8d bb f8 00 00 00 e8 75 ae d5 d2 48 39 eb 74 0a 48 89 df 5b 5d e9 16 6c 94 d2 5b 5d c3 <0f> 0b eb b6 0f 1f 44 00 00 66 2e 0f 1f 84 +[ 733.635798] RSP: 0018:ffffbfbb066bb9d8 EFLAGS: 00010202 +[ 733.641627] RAX: 0000000000000001 RBX: ffff9cdd17392800 RCX: 000000008010000f +[ 733.649588] RDX: ffff9cdd1df547e0 RSI: ffff9cdd17392800 RDI: ffff9cdd0f84c800 +[ 733.657547] RBP: ffff9cdd0f84c800 R08: 0000000000000001 R09: 0000000000000000 +[ 733.665508] R10: ffff9cdd0f84d000 R11: 0000000000000001 R12: 0000000000000001 +[ 733.673469] R13: 0000000000000000 R14: 0000000000000001 R15: ffff9cdd17392200 +[ 733.681430] FS: 00007f911890a740(0000) GS:ffff9cdd1f8c0000(0000) knlGS:0000000000000000 +[ 733.690456] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 733.696864] CR2: 0000000000b5544c CR3: 0000000859374002 CR4: 00000000001606e0 +[ 733.704826] Call Trace: +[ 733.707554] cbq_destroy+0xa1/0xd0 [sch_cbq] +[ 733.712318] qdisc_destroy+0x62/0x130 +[ 733.716401] dsmark_destroy+0x2a/0x70 [sch_dsmark] +[ 733.721745] qdisc_destroy+0x62/0x130 +[ 733.725829] qdisc_graft+0x3ba/0x470 +[ 733.729817] tc_get_qdisc+0x2a6/0x2c0 +[ 733.733901] ? cred_has_capability+0x7d/0x130 +[ 733.738761] rtnetlink_rcv_msg+0x263/0x2d0 +[ 733.743330] ? rtnl_calcit.isra.30+0x110/0x110 +[ 733.748287] netlink_rcv_skb+0x4d/0x130 +[ 733.752576] netlink_unicast+0x1a3/0x250 +[ 733.756949] netlink_sendmsg+0x2ae/0x3a0 +[ 733.761324] sock_sendmsg+0x36/0x40 +[ 733.765213] ___sys_sendmsg+0x26f/0x2d0 +[ 733.769493] ? handle_pte_fault+0x586/0xdf0 +[ 733.774158] ? __handle_mm_fault+0x389/0x500 +[ 733.778919] ? __sys_sendmsg+0x5e/0xa0 +[ 733.783099] __sys_sendmsg+0x5e/0xa0 +[ 733.787087] do_syscall_64+0x5b/0x180 +[ 733.791171] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 733.796805] RIP: 0033:0x7f9117f23f10 +[ 733.800791] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 +[ 733.821873] RSP: 002b:00007ffe96818398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +[ 733.830319] RAX: ffffffffffffffda RBX: 000000005b71244c RCX: 00007f9117f23f10 +[ 733.838280] RDX: 0000000000000000 RSI: 00007ffe968183e0 RDI: 0000000000000003 +[ 733.846241] RBP: 00007ffe968183e0 R08: 000000000000ffff R09: 0000000000000003 +[ 733.854202] R10: 00007ffe96817e20 R11: 0000000000000246 R12: 0000000000000000 +[ 733.862161] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000 +[ 733.870121] ---[ end trace 28edd4aad712ddca ]--- + +This is because we didn't update f->result.res when create new filter. Then in +tcindex_delete() -> tcf_unbind_filter(), we will failed to find out the res +and unbind filter, which will trigger the WARN_ON() in cbq_destroy_class(). + +Fix it by updating f->result.res when create new filter. + +Fixes: 6e0565697a106 ("net_sched: fix another crash in cls_tcindex") +Reported-by: Li Shuang +Signed-off-by: Hangbin Liu +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_tcindex.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sched/cls_tcindex.c ++++ b/net/sched/cls_tcindex.c +@@ -486,6 +486,7 @@ tcindex_set_parms(struct net *net, struc + struct tcindex_filter *nfp; + struct tcindex_filter __rcu **fp; + ++ f->result.res = r->res; + tcf_exts_change(&f->result.exts, &r->exts); + + fp = cp->h + (handle % cp->hash); diff --git a/queue-4.17/net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch b/queue-4.17/net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch new file mode 100644 index 00000000000..3bbe053e22a --- /dev/null +++ b/queue-4.17/net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch @@ -0,0 +1,120 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Hangbin Liu +Date: Mon, 13 Aug 2018 18:44:03 +0800 +Subject: net_sched: fix NULL pointer dereference when delete tcindex filter + +From: Hangbin Liu + +[ Upstream commit 2df8bee5654bb2b7312662ca6810d4dc16b0b67f ] + +Li Shuang reported the following crash: + +[ 71.267724] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 +[ 71.276456] PGD 800000085d9bd067 P4D 800000085d9bd067 PUD 859a0b067 PMD 0 +[ 71.284127] Oops: 0000 [#1] SMP PTI +[ 71.288015] CPU: 12 PID: 2386 Comm: tc Not tainted 4.18.0-rc8.latest+ #131 +[ 71.295686] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016 +[ 71.304037] RIP: 0010:tcindex_delete+0x72/0x280 [cls_tcindex] +[ 71.310446] Code: 00 31 f6 48 87 75 20 48 85 f6 74 11 48 8b 47 18 48 8b 40 08 48 8b 40 50 e8 fb a6 f8 fc 48 85 db 0f 84 dc 00 00 00 48 8b 73 18 <8b> 56 04 48 8d 7e 04 85 d2 0f 84 7b 01 00 +[ 71.331517] RSP: 0018:ffffb45207b3f898 EFLAGS: 00010282 +[ 71.337345] RAX: ffff8ad3d72d6360 RBX: ffff8acc84393680 RCX: 000000000000002e +[ 71.345306] RDX: ffff8ad3d72c8570 RSI: 0000000000000000 RDI: ffff8ad847a45800 +[ 71.353277] RBP: ffff8acc84393688 R08: ffff8ad3d72c8400 R09: 0000000000000000 +[ 71.361238] R10: ffff8ad3de786e00 R11: 0000000000000000 R12: ffffb45207b3f8c7 +[ 71.369199] R13: ffff8ad3d93bd2a0 R14: 000000000000002e R15: ffff8ad3d72c9600 +[ 71.377161] FS: 00007f9d3ec3e740(0000) GS:ffff8ad3df980000(0000) knlGS:0000000000000000 +[ 71.386188] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 71.392597] CR2: 0000000000000004 CR3: 0000000852f06003 CR4: 00000000001606e0 +[ 71.400558] Call Trace: +[ 71.403299] tcindex_destroy_element+0x25/0x40 [cls_tcindex] +[ 71.409611] tcindex_walk+0xbb/0x110 [cls_tcindex] +[ 71.414953] tcindex_destroy+0x44/0x90 [cls_tcindex] +[ 71.420492] ? tcindex_delete+0x280/0x280 [cls_tcindex] +[ 71.426323] tcf_proto_destroy+0x16/0x40 +[ 71.430696] tcf_chain_flush+0x51/0x70 +[ 71.434876] tcf_block_put_ext.part.30+0x8f/0x1b0 +[ 71.440122] tcf_block_put+0x4d/0x70 +[ 71.444108] cbq_destroy+0x4d/0xd0 [sch_cbq] +[ 71.448869] qdisc_destroy+0x62/0x130 +[ 71.452951] dsmark_destroy+0x2a/0x70 [sch_dsmark] +[ 71.458300] qdisc_destroy+0x62/0x130 +[ 71.462373] qdisc_graft+0x3ba/0x470 +[ 71.466359] tc_get_qdisc+0x2a6/0x2c0 +[ 71.470443] ? cred_has_capability+0x7d/0x130 +[ 71.475307] rtnetlink_rcv_msg+0x263/0x2d0 +[ 71.479875] ? rtnl_calcit.isra.30+0x110/0x110 +[ 71.484832] netlink_rcv_skb+0x4d/0x130 +[ 71.489109] netlink_unicast+0x1a3/0x250 +[ 71.493482] netlink_sendmsg+0x2ae/0x3a0 +[ 71.497859] sock_sendmsg+0x36/0x40 +[ 71.501748] ___sys_sendmsg+0x26f/0x2d0 +[ 71.506029] ? handle_pte_fault+0x586/0xdf0 +[ 71.510694] ? __handle_mm_fault+0x389/0x500 +[ 71.515457] ? __sys_sendmsg+0x5e/0xa0 +[ 71.519636] __sys_sendmsg+0x5e/0xa0 +[ 71.523626] do_syscall_64+0x5b/0x180 +[ 71.527711] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 71.533345] RIP: 0033:0x7f9d3e257f10 +[ 71.537331] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 +[ 71.558401] RSP: 002b:00007fff6f893398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +[ 71.566848] RAX: ffffffffffffffda RBX: 000000005b71274d RCX: 00007f9d3e257f10 +[ 71.574810] RDX: 0000000000000000 RSI: 00007fff6f8933e0 RDI: 0000000000000003 +[ 71.582770] RBP: 00007fff6f8933e0 R08: 000000000000ffff R09: 0000000000000003 +[ 71.590729] R10: 00007fff6f892e20 R11: 0000000000000246 R12: 0000000000000000 +[ 71.598689] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000 +[ 71.606651] Modules linked in: sch_cbq cls_tcindex sch_dsmark xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_coni +[ 71.685425] libahci i2c_algo_bit i2c_core i40e libata dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod +[ 71.697075] CR2: 0000000000000004 +[ 71.700792] ---[ end trace f604eb1acacd978b ]--- + +Reproducer: +tc qdisc add dev lo handle 1:0 root dsmark indices 64 set_tc_index +tc filter add dev lo parent 1:0 protocol ip prio 1 tcindex mask 0xfc shift 2 +tc qdisc add dev lo parent 1:0 handle 2:0 cbq bandwidth 10Mbit cell 8 avpkt 1000 mpu 64 +tc class add dev lo parent 2:0 classid 2:1 cbq bandwidth 10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated allot 1514 weight 1 maxburst 10 +tc filter add dev lo parent 2:0 protocol ip prio 1 handle 0x2e tcindex classid 2:1 pass_on +tc qdisc add dev lo parent 2:1 pfifo limit 5 +tc qdisc del dev lo root + +This is because in tcindex_set_parms, when there is no old_r, we set new +exts to cr.exts. And we didn't set it to filter when r == &new_filter_result. + +Then in tcindex_delete() -> tcf_exts_get_net(), we will get NULL pointer +dereference as we didn't init exts. + +Fix it by moving tcf_exts_change() after "if (old_r && old_r != r)" check. +Then we don't need "cr" as there is no errout after that. + +Fixes: bf63ac73b3e13 ("net_sched: fix an oops in tcindex filter") +Reported-by: Li Shuang +Signed-off-by: Hangbin Liu +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_tcindex.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/net/sched/cls_tcindex.c ++++ b/net/sched/cls_tcindex.c +@@ -468,11 +468,6 @@ tcindex_set_parms(struct net *net, struc + tcf_bind_filter(tp, &cr.res, base); + } + +- if (old_r) +- tcf_exts_change(&r->exts, &e); +- else +- tcf_exts_change(&cr.exts, &e); +- + if (old_r && old_r != r) { + err = tcindex_filter_result_init(old_r); + if (err < 0) { +@@ -483,6 +478,8 @@ tcindex_set_parms(struct net *net, struc + + oldp = p; + r->res = cr.res; ++ tcf_exts_change(&r->exts, &e); ++ + rcu_assign_pointer(tp->root, cp); + + if (r == &new_filter_result) { diff --git a/queue-4.17/r8169-don-t-use-msi-x-on-rtl8168g.patch b/queue-4.17/r8169-don-t-use-msi-x-on-rtl8168g.patch new file mode 100644 index 00000000000..8c91c476a1a --- /dev/null +++ b/queue-4.17/r8169-don-t-use-msi-x-on-rtl8168g.patch @@ -0,0 +1,42 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Heiner Kallweit +Date: Sun, 12 Aug 2018 13:26:26 +0200 +Subject: r8169: don't use MSI-X on RTL8168g + +From: Heiner Kallweit + +[ Upstream commit 7c53a722459c1d6ffb0f5b2058c06ca8980b8600 ] + +There have been two reports that network doesn't come back on resume +from suspend when using MSI-X. Both cases affect the same chip version +(RTL8168g - version 40), on different systems. Falling back to MSI +fixes the issue. +Even though we don't really have a proof yet that the network chip +version is to blame, let's disable MSI-X for this version. + +Reported-by: Steve Dodd +Reported-by: Lou Reed +Tested-by: Steve Dodd +Tested-by: Lou Reed +Fixes: 6c6aa15fdea5 ("r8169: improve interrupt handling") +Signed-off-by: Heiner Kallweit +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/ethernet/realtek/r8169.c ++++ b/drivers/net/ethernet/realtek/r8169.c +@@ -8066,6 +8066,11 @@ static int rtl_alloc_irq(struct rtl8169_ + RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable); + RTL_W8(tp, Cfg9346, Cfg9346_Lock); + flags = PCI_IRQ_LEGACY; ++ } else if (tp->mac_version == RTL_GIGA_MAC_VER_40) { ++ /* This version was reported to have issues with resume ++ * from suspend when using MSI-X ++ */ ++ flags = PCI_IRQ_LEGACY | PCI_IRQ_MSI; + } else { + flags = PCI_IRQ_ALL_TYPES; + } diff --git a/queue-4.17/rxrpc-fix-the-keepalive-generator.patch b/queue-4.17/rxrpc-fix-the-keepalive-generator.patch new file mode 100644 index 00000000000..878bf6d9e55 --- /dev/null +++ b/queue-4.17/rxrpc-fix-the-keepalive-generator.patch @@ -0,0 +1,454 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: David Howells +Date: Wed, 8 Aug 2018 11:30:02 +0100 +Subject: rxrpc: Fix the keepalive generator [ver #2] + +From: David Howells + +[ Upstream commit 330bdcfadceea5e9a1526d731711e163f9a90975 ] + +AF_RXRPC has a keepalive message generator that generates a message for a +peer ~20s after the last transmission to that peer to keep firewall ports +open. The implementation is incorrect in the following ways: + + (1) It mixes up ktime_t and time64_t types. + + (2) It uses ktime_get_real(), the output of which may jump forward or + backward due to adjustments to the time of day. + + (3) If the current time jumps forward too much or jumps backwards, the + generator function will crank the base of the time ring round one slot + at a time (ie. a 1s period) until it catches up, spewing out VERSION + packets as it goes. + +Fix the problem by: + + (1) Only using time64_t. There's no need for sub-second resolution. + + (2) Use ktime_get_seconds() rather than ktime_get_real() so that time + isn't perceived to go backwards. + + (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two + parts: + + (a) The "worker" function that manages the buckets and the timer. + + (b) The "dispatch" function that takes the pending peers and + potentially transmits a keepalive packet before putting them back + in the ring into the slot appropriate to the revised last-Tx time. + + (4) Taking everything that's pending out of the ring and splicing it into + a temporary collector list for processing. + + In the case that there's been a significant jump forward, the ring + gets entirely emptied and then the time base can be warped forward + before the peers are processed. + + The warping can't happen if the ring isn't empty because the slot a + peer is in is keepalive-time dependent, relative to the base time. + + (5) Limit the number of iterations of the bucket array when scanning it. + + (6) Set the timer to skip any empty slots as there's no point waking up if + there's nothing to do yet. + +This can be triggered by an incoming call from a server after a reboot with +AF_RXRPC and AFS built into the kernel causing a peer record to be set up +before userspace is started. The system clock is then adjusted by +userspace, thereby potentially causing the keepalive generator to have a +meltdown - which leads to a message like: + + watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] + ... + Workqueue: krxrpcd rxrpc_peer_keepalive_worker + EIP: lock_acquire+0x69/0x80 + ... + Call Trace: + ? rxrpc_peer_keepalive_worker+0x5e/0x350 + ? _raw_spin_lock_bh+0x29/0x60 + ? rxrpc_peer_keepalive_worker+0x5e/0x350 + ? rxrpc_peer_keepalive_worker+0x5e/0x350 + ? __lock_acquire+0x3d3/0x870 + ? process_one_work+0x110/0x340 + ? process_one_work+0x166/0x340 + ? process_one_work+0x110/0x340 + ? worker_thread+0x39/0x3c0 + ? kthread+0xdb/0x110 + ? cancel_delayed_work+0x90/0x90 + ? kthread_stop+0x70/0x70 + ? ret_from_fork+0x19/0x24 + +Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") +Reported-by: kernel test robot +Signed-off-by: David Howells +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rxrpc/ar-internal.h | 8 +- + net/rxrpc/conn_event.c | 4 - + net/rxrpc/net_ns.c | 6 - + net/rxrpc/output.c | 12 +-- + net/rxrpc/peer_event.c | 162 ++++++++++++++++++++++++++---------------------- + net/rxrpc/peer_object.c | 8 +- + net/rxrpc/rxkad.c | 4 - + 7 files changed, 112 insertions(+), 92 deletions(-) + +--- a/net/rxrpc/ar-internal.h ++++ b/net/rxrpc/ar-internal.h +@@ -104,9 +104,9 @@ struct rxrpc_net { + + #define RXRPC_KEEPALIVE_TIME 20 /* NAT keepalive time in seconds */ + u8 peer_keepalive_cursor; +- ktime_t peer_keepalive_base; +- struct hlist_head peer_keepalive[RXRPC_KEEPALIVE_TIME + 1]; +- struct hlist_head peer_keepalive_new; ++ time64_t peer_keepalive_base; ++ struct list_head peer_keepalive[32]; ++ struct list_head peer_keepalive_new; + struct timer_list peer_keepalive_timer; + struct work_struct peer_keepalive_work; + }; +@@ -295,7 +295,7 @@ struct rxrpc_peer { + struct hlist_head error_targets; /* targets for net error distribution */ + struct work_struct error_distributor; + struct rb_root service_conns; /* Service connections */ +- struct hlist_node keepalive_link; /* Link in net->peer_keepalive[] */ ++ struct list_head keepalive_link; /* Link in net->peer_keepalive[] */ + time64_t last_tx_at; /* Last time packet sent here */ + seqlock_t service_conn_lock; + spinlock_t lock; /* access lock */ +--- a/net/rxrpc/conn_event.c ++++ b/net/rxrpc/conn_event.c +@@ -136,7 +136,7 @@ static void rxrpc_conn_retransmit_call(s + } + + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); +- conn->params.peer->last_tx_at = ktime_get_real(); ++ conn->params.peer->last_tx_at = ktime_get_seconds(); + if (ret < 0) + trace_rxrpc_tx_fail(conn->debug_id, serial, ret, + rxrpc_tx_fail_call_final_resend); +@@ -245,7 +245,7 @@ static int rxrpc_abort_connection(struct + return -EAGAIN; + } + +- conn->params.peer->last_tx_at = ktime_get_real(); ++ conn->params.peer->last_tx_at = ktime_get_seconds(); + + _leave(" = 0"); + return 0; +--- a/net/rxrpc/net_ns.c ++++ b/net/rxrpc/net_ns.c +@@ -85,12 +85,12 @@ static __net_init int rxrpc_init_net(str + hash_init(rxnet->peer_hash); + spin_lock_init(&rxnet->peer_hash_lock); + for (i = 0; i < ARRAY_SIZE(rxnet->peer_keepalive); i++) +- INIT_HLIST_HEAD(&rxnet->peer_keepalive[i]); +- INIT_HLIST_HEAD(&rxnet->peer_keepalive_new); ++ INIT_LIST_HEAD(&rxnet->peer_keepalive[i]); ++ INIT_LIST_HEAD(&rxnet->peer_keepalive_new); + timer_setup(&rxnet->peer_keepalive_timer, + rxrpc_peer_keepalive_timeout, 0); + INIT_WORK(&rxnet->peer_keepalive_work, rxrpc_peer_keepalive_worker); +- rxnet->peer_keepalive_base = ktime_add(ktime_get_real(), NSEC_PER_SEC); ++ rxnet->peer_keepalive_base = ktime_get_seconds(); + + ret = -ENOMEM; + rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net); +--- a/net/rxrpc/output.c ++++ b/net/rxrpc/output.c +@@ -209,7 +209,7 @@ int rxrpc_send_ack_packet(struct rxrpc_c + now = ktime_get_real(); + if (ping) + call->ping_time = now; +- conn->params.peer->last_tx_at = ktime_get_real(); ++ conn->params.peer->last_tx_at = ktime_get_seconds(); + if (ret < 0) + trace_rxrpc_tx_fail(call->debug_id, serial, ret, + rxrpc_tx_fail_call_ack); +@@ -296,7 +296,7 @@ int rxrpc_send_abort_packet(struct rxrpc + + ret = kernel_sendmsg(conn->params.local->socket, + &msg, iov, 1, sizeof(pkt)); +- conn->params.peer->last_tx_at = ktime_get_real(); ++ conn->params.peer->last_tx_at = ktime_get_seconds(); + if (ret < 0) + trace_rxrpc_tx_fail(call->debug_id, serial, ret, + rxrpc_tx_fail_call_abort); +@@ -391,7 +391,7 @@ int rxrpc_send_data_packet(struct rxrpc_ + * message and update the peer record + */ + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); +- conn->params.peer->last_tx_at = ktime_get_real(); ++ conn->params.peer->last_tx_at = ktime_get_seconds(); + + up_read(&conn->params.local->defrag_sem); + if (ret < 0) +@@ -457,7 +457,7 @@ send_fragmentable: + if (ret == 0) { + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 2, len); +- conn->params.peer->last_tx_at = ktime_get_real(); ++ conn->params.peer->last_tx_at = ktime_get_seconds(); + + opt = IP_PMTUDISC_DO; + kernel_setsockopt(conn->params.local->socket, SOL_IP, +@@ -475,7 +475,7 @@ send_fragmentable: + if (ret == 0) { + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 2, len); +- conn->params.peer->last_tx_at = ktime_get_real(); ++ conn->params.peer->last_tx_at = ktime_get_seconds(); + + opt = IPV6_PMTUDISC_DO; + kernel_setsockopt(conn->params.local->socket, +@@ -599,6 +599,6 @@ void rxrpc_send_keepalive(struct rxrpc_p + trace_rxrpc_tx_fail(peer->debug_id, 0, ret, + rxrpc_tx_fail_version_keepalive); + +- peer->last_tx_at = ktime_get_real(); ++ peer->last_tx_at = ktime_get_seconds(); + _leave(""); + } +--- a/net/rxrpc/peer_event.c ++++ b/net/rxrpc/peer_event.c +@@ -350,97 +350,117 @@ void rxrpc_peer_add_rtt(struct rxrpc_cal + } + + /* +- * Perform keep-alive pings with VERSION packets to keep any NAT alive. ++ * Perform keep-alive pings. + */ +-void rxrpc_peer_keepalive_worker(struct work_struct *work) ++static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, ++ struct list_head *collector, ++ time64_t base, ++ u8 cursor) + { +- struct rxrpc_net *rxnet = +- container_of(work, struct rxrpc_net, peer_keepalive_work); + struct rxrpc_peer *peer; +- unsigned long delay; +- ktime_t base, now = ktime_get_real(); +- s64 diff; +- u8 cursor, slot; ++ const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1; ++ time64_t keepalive_at; ++ int slot; + +- base = rxnet->peer_keepalive_base; +- cursor = rxnet->peer_keepalive_cursor; +- +- _enter("%u,%lld", cursor, ktime_sub(now, base)); ++ spin_lock_bh(&rxnet->peer_hash_lock); + +-next_bucket: +- diff = ktime_to_ns(ktime_sub(now, base)); +- if (diff < 0) +- goto resched; ++ while (!list_empty(collector)) { ++ peer = list_entry(collector->next, ++ struct rxrpc_peer, keepalive_link); ++ ++ list_del_init(&peer->keepalive_link); ++ if (!rxrpc_get_peer_maybe(peer)) ++ continue; + +- _debug("at %u", cursor); +- spin_lock_bh(&rxnet->peer_hash_lock); +-next_peer: +- if (!rxnet->live) { + spin_unlock_bh(&rxnet->peer_hash_lock); +- goto out; +- } + +- /* Everything in the bucket at the cursor is processed this second; the +- * bucket at cursor + 1 goes now + 1s and so on... +- */ +- if (hlist_empty(&rxnet->peer_keepalive[cursor])) { +- if (hlist_empty(&rxnet->peer_keepalive_new)) { +- spin_unlock_bh(&rxnet->peer_hash_lock); +- goto emptied_bucket; ++ keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; ++ slot = keepalive_at - base; ++ _debug("%02x peer %u t=%d {%pISp}", ++ cursor, peer->debug_id, slot, &peer->srx.transport); ++ ++ if (keepalive_at <= base || ++ keepalive_at > base + RXRPC_KEEPALIVE_TIME) { ++ rxrpc_send_keepalive(peer); ++ slot = RXRPC_KEEPALIVE_TIME; + } + +- hlist_move_list(&rxnet->peer_keepalive_new, +- &rxnet->peer_keepalive[cursor]); ++ /* A transmission to this peer occurred since last we examined ++ * it so put it into the appropriate future bucket. ++ */ ++ slot += cursor; ++ slot &= mask; ++ spin_lock_bh(&rxnet->peer_hash_lock); ++ list_add_tail(&peer->keepalive_link, ++ &rxnet->peer_keepalive[slot & mask]); ++ rxrpc_put_peer(peer); + } + +- peer = hlist_entry(rxnet->peer_keepalive[cursor].first, +- struct rxrpc_peer, keepalive_link); +- hlist_del_init(&peer->keepalive_link); +- if (!rxrpc_get_peer_maybe(peer)) +- goto next_peer; +- + spin_unlock_bh(&rxnet->peer_hash_lock); ++} ++ ++/* ++ * Perform keep-alive pings with VERSION packets to keep any NAT alive. ++ */ ++void rxrpc_peer_keepalive_worker(struct work_struct *work) ++{ ++ struct rxrpc_net *rxnet = ++ container_of(work, struct rxrpc_net, peer_keepalive_work); ++ const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1; ++ time64_t base, now, delay; ++ u8 cursor, stop; ++ LIST_HEAD(collector); + +- _debug("peer %u {%pISp}", peer->debug_id, &peer->srx.transport); ++ now = ktime_get_seconds(); ++ base = rxnet->peer_keepalive_base; ++ cursor = rxnet->peer_keepalive_cursor; ++ _enter("%lld,%u", base - now, cursor); + +-recalc: +- diff = ktime_divns(ktime_sub(peer->last_tx_at, base), NSEC_PER_SEC); +- if (diff < -30 || diff > 30) +- goto send; /* LSW of 64-bit time probably wrapped on 32-bit */ +- diff += RXRPC_KEEPALIVE_TIME - 1; +- if (diff < 0) +- goto send; +- +- slot = (diff > RXRPC_KEEPALIVE_TIME - 1) ? RXRPC_KEEPALIVE_TIME - 1 : diff; +- if (slot == 0) +- goto send; ++ if (!rxnet->live) ++ return; + +- /* A transmission to this peer occurred since last we examined it so +- * put it into the appropriate future bucket. ++ /* Remove to a temporary list all the peers that are currently lodged ++ * in expired buckets plus all new peers. ++ * ++ * Everything in the bucket at the cursor is processed this ++ * second; the bucket at cursor + 1 goes at now + 1s and so ++ * on... + */ +- slot = (slot + cursor) % ARRAY_SIZE(rxnet->peer_keepalive); + spin_lock_bh(&rxnet->peer_hash_lock); +- hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive[slot]); +- rxrpc_put_peer(peer); +- goto next_peer; +- +-send: +- rxrpc_send_keepalive(peer); +- now = ktime_get_real(); +- goto recalc; +- +-emptied_bucket: +- cursor++; +- if (cursor >= ARRAY_SIZE(rxnet->peer_keepalive)) +- cursor = 0; +- base = ktime_add_ns(base, NSEC_PER_SEC); +- goto next_bucket; ++ list_splice_init(&rxnet->peer_keepalive_new, &collector); ++ ++ stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive); ++ while (base <= now && (s8)(cursor - stop) < 0) { ++ list_splice_tail_init(&rxnet->peer_keepalive[cursor & mask], ++ &collector); ++ base++; ++ cursor++; ++ } ++ ++ base = now; ++ spin_unlock_bh(&rxnet->peer_hash_lock); + +-resched: + rxnet->peer_keepalive_base = base; + rxnet->peer_keepalive_cursor = cursor; +- delay = nsecs_to_jiffies(-diff) + 1; +- timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay); +-out: ++ rxrpc_peer_keepalive_dispatch(rxnet, &collector, base, cursor); ++ ASSERT(list_empty(&collector)); ++ ++ /* Schedule the timer for the next occupied timeslot. */ ++ cursor = rxnet->peer_keepalive_cursor; ++ stop = cursor + RXRPC_KEEPALIVE_TIME - 1; ++ for (; (s8)(cursor - stop) < 0; cursor++) { ++ if (!list_empty(&rxnet->peer_keepalive[cursor & mask])) ++ break; ++ base++; ++ } ++ ++ now = ktime_get_seconds(); ++ delay = base - now; ++ if (delay < 1) ++ delay = 1; ++ delay *= HZ; ++ if (rxnet->live) ++ timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay); ++ + _leave(""); + } +--- a/net/rxrpc/peer_object.c ++++ b/net/rxrpc/peer_object.c +@@ -322,7 +322,7 @@ struct rxrpc_peer *rxrpc_lookup_incoming + if (!peer) { + peer = prealloc; + hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); +- hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive_new); ++ list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); + } + + spin_unlock(&rxnet->peer_hash_lock); +@@ -367,8 +367,8 @@ struct rxrpc_peer *rxrpc_lookup_peer(str + if (!peer) { + hash_add_rcu(rxnet->peer_hash, + &candidate->hash_link, hash_key); +- hlist_add_head(&candidate->keepalive_link, +- &rxnet->peer_keepalive_new); ++ list_add_tail(&candidate->keepalive_link, ++ &rxnet->peer_keepalive_new); + } + + spin_unlock_bh(&rxnet->peer_hash_lock); +@@ -441,7 +441,7 @@ static void __rxrpc_put_peer(struct rxrp + + spin_lock_bh(&rxnet->peer_hash_lock); + hash_del_rcu(&peer->hash_link); +- hlist_del_init(&peer->keepalive_link); ++ list_del_init(&peer->keepalive_link); + spin_unlock_bh(&rxnet->peer_hash_lock); + + kfree_rcu(peer, rcu); +--- a/net/rxrpc/rxkad.c ++++ b/net/rxrpc/rxkad.c +@@ -669,7 +669,7 @@ static int rxkad_issue_challenge(struct + return -EAGAIN; + } + +- conn->params.peer->last_tx_at = ktime_get_real(); ++ conn->params.peer->last_tx_at = ktime_get_seconds(); + _leave(" = 0"); + return 0; + } +@@ -725,7 +725,7 @@ static int rxkad_send_response(struct rx + return -EAGAIN; + } + +- conn->params.peer->last_tx_at = ktime_get_real(); ++ conn->params.peer->last_tx_at = ktime_get_seconds(); + _leave(" = 0"); + return 0; + } diff --git a/queue-4.17/series b/queue-4.17/series new file mode 100644 index 00000000000..75f7d82866b --- /dev/null +++ b/queue-4.17/series @@ -0,0 +1,16 @@ +dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch +l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch +llc-use-refcount_inc_not_zero-for-llc_sap_find.patch +net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch +vsock-split-dwork-to-avoid-reinitializations.patch +net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch +vhost-reset-metadata-cache-when-initializing-new-iotlb.patch +mlxsw-core_acl_flex_actions-return-error-for-conflicting-actions.patch +net-aquantia-fix-iff_allmulti-flag-functionality.patch +ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch +mlxsw-core_acl_flex_actions-remove-redundant-resource-destruction.patch +mlxsw-core_acl_flex_actions-remove-redundant-counter-destruction.patch +mlxsw-core_acl_flex_actions-remove-redundant-mirror-resource-destruction.patch +net-mlx5e-properly-check-if-hairpin-is-possible-between-two-functions.patch +r8169-don-t-use-msi-x-on-rtl8168g.patch +rxrpc-fix-the-keepalive-generator.patch diff --git a/queue-4.17/vhost-reset-metadata-cache-when-initializing-new-iotlb.patch b/queue-4.17/vhost-reset-metadata-cache-when-initializing-new-iotlb.patch new file mode 100644 index 00000000000..f6c3f8434b8 --- /dev/null +++ b/queue-4.17/vhost-reset-metadata-cache-when-initializing-new-iotlb.patch @@ -0,0 +1,40 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Jason Wang +Date: Wed, 8 Aug 2018 11:43:04 +0800 +Subject: vhost: reset metadata cache when initializing new IOTLB + +From: Jason Wang + +[ Upstream commit b13f9c6364373a1b9f71e9846dc4fb199296f926 ] + +We need to reset metadata cache during new IOTLB initialization, +otherwise the stale pointers to previous IOTLB may be still accessed +which will lead a use after free. + +Reported-by: syzbot+c51e6736a1bf614b3272@syzkaller.appspotmail.com +Fixes: f88949138058 ("vhost: introduce O(1) vq metadata cache") +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/vhost.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/drivers/vhost/vhost.c ++++ b/drivers/vhost/vhost.c +@@ -1556,9 +1556,12 @@ int vhost_init_device_iotlb(struct vhost + d->iotlb = niotlb; + + for (i = 0; i < d->nvqs; ++i) { +- mutex_lock(&d->vqs[i]->mutex); +- d->vqs[i]->iotlb = niotlb; +- mutex_unlock(&d->vqs[i]->mutex); ++ struct vhost_virtqueue *vq = d->vqs[i]; ++ ++ mutex_lock(&vq->mutex); ++ vq->iotlb = niotlb; ++ __vhost_vq_meta_reset(vq); ++ mutex_unlock(&vq->mutex); + } + + vhost_umem_clean(oiotlb); diff --git a/queue-4.17/vsock-split-dwork-to-avoid-reinitializations.patch b/queue-4.17/vsock-split-dwork-to-avoid-reinitializations.patch new file mode 100644 index 00000000000..3cbb10e0f46 --- /dev/null +++ b/queue-4.17/vsock-split-dwork-to-avoid-reinitializations.patch @@ -0,0 +1,134 @@ +From foo@baz Sat Aug 18 11:41:41 CEST 2018 +From: Cong Wang +Date: Mon, 6 Aug 2018 11:06:02 -0700 +Subject: vsock: split dwork to avoid reinitializations + +From: Cong Wang + +[ Upstream commit 455f05ecd2b219e9a216050796d30c830d9bc393 ] + +syzbot reported that we reinitialize an active delayed +work in vsock_stream_connect(): + + ODEBUG: init active (active state 0) object type: timer_list hint: + delayed_work_timer_fn+0x0/0x90 kernel/workqueue.c:1414 + WARNING: CPU: 1 PID: 11518 at lib/debugobjects.c:329 + debug_print_object+0x16a/0x210 lib/debugobjects.c:326 + +The pattern is apparently wrong, we should only initialize +the dealyed work once and could repeatly schedule it. So we +have to move out the initializations to allocation side. +And to avoid confusion, we can split the shared dwork +into two, instead of re-using the same one. + +Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") +Reported-by: +Cc: Andy king +Cc: Stefan Hajnoczi +Cc: Jorgen Hansen +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/af_vsock.h | 4 ++-- + net/vmw_vsock/af_vsock.c | 15 ++++++++------- + net/vmw_vsock/vmci_transport.c | 3 +-- + 3 files changed, 11 insertions(+), 11 deletions(-) + +--- a/include/net/af_vsock.h ++++ b/include/net/af_vsock.h +@@ -64,7 +64,8 @@ struct vsock_sock { + struct list_head pending_links; + struct list_head accept_queue; + bool rejected; +- struct delayed_work dwork; ++ struct delayed_work connect_work; ++ struct delayed_work pending_work; + struct delayed_work close_work; + bool close_work_scheduled; + u32 peer_shutdown; +@@ -77,7 +78,6 @@ struct vsock_sock { + + s64 vsock_stream_has_data(struct vsock_sock *vsk); + s64 vsock_stream_has_space(struct vsock_sock *vsk); +-void vsock_pending_work(struct work_struct *work); + struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -451,14 +451,14 @@ static int vsock_send_shutdown(struct so + return transport->shutdown(vsock_sk(sk), mode); + } + +-void vsock_pending_work(struct work_struct *work) ++static void vsock_pending_work(struct work_struct *work) + { + struct sock *sk; + struct sock *listener; + struct vsock_sock *vsk; + bool cleanup; + +- vsk = container_of(work, struct vsock_sock, dwork.work); ++ vsk = container_of(work, struct vsock_sock, pending_work.work); + sk = sk_vsock(vsk); + listener = vsk->listener; + cleanup = true; +@@ -498,7 +498,6 @@ out: + sock_put(sk); + sock_put(listener); + } +-EXPORT_SYMBOL_GPL(vsock_pending_work); + + /**** SOCKET OPERATIONS ****/ + +@@ -597,6 +596,8 @@ static int __vsock_bind(struct sock *sk, + return retval; + } + ++static void vsock_connect_timeout(struct work_struct *work); ++ + struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, +@@ -638,6 +639,8 @@ struct sock *__vsock_create(struct net * + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + vsk->peer_shutdown = 0; ++ INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout); ++ INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work); + + psk = parent ? vsock_sk(parent) : NULL; + if (parent) { +@@ -1117,7 +1120,7 @@ static void vsock_connect_timeout(struct + struct vsock_sock *vsk; + int cancel = 0; + +- vsk = container_of(work, struct vsock_sock, dwork.work); ++ vsk = container_of(work, struct vsock_sock, connect_work.work); + sk = sk_vsock(vsk); + + lock_sock(sk); +@@ -1221,9 +1224,7 @@ static int vsock_stream_connect(struct s + * timeout fires. + */ + sock_hold(sk); +- INIT_DELAYED_WORK(&vsk->dwork, +- vsock_connect_timeout); +- schedule_delayed_work(&vsk->dwork, timeout); ++ schedule_delayed_work(&vsk->connect_work, timeout); + + /* Skip ahead to preserve error code set above. */ + goto out_wait; +--- a/net/vmw_vsock/vmci_transport.c ++++ b/net/vmw_vsock/vmci_transport.c +@@ -1094,8 +1094,7 @@ static int vmci_transport_recv_listen(st + vpending->listener = sk; + sock_hold(sk); + sock_hold(pending); +- INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); +- schedule_delayed_work(&vpending->dwork, HZ); ++ schedule_delayed_work(&vpending->pending_work, HZ); + + out: + return err; -- 2.47.3