]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.17-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 18 Aug 2018 09:43:44 +0000 (11:43 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 18 Aug 2018 09:43:44 +0000 (11:43 +0200)
added patches:
dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch
ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch
l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch
llc-use-refcount_inc_not_zero-for-llc_sap_find.patch
mlxsw-core_acl_flex_actions-remove-redundant-counter-destruction.patch
mlxsw-core_acl_flex_actions-remove-redundant-mirror-resource-destruction.patch
mlxsw-core_acl_flex_actions-remove-redundant-resource-destruction.patch
mlxsw-core_acl_flex_actions-return-error-for-conflicting-actions.patch
net-aquantia-fix-iff_allmulti-flag-functionality.patch
net-mlx5e-properly-check-if-hairpin-is-possible-between-two-functions.patch
net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch
net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch
r8169-don-t-use-msi-x-on-rtl8168g.patch
rxrpc-fix-the-keepalive-generator.patch
vhost-reset-metadata-cache-when-initializing-new-iotlb.patch
vsock-split-dwork-to-avoid-reinitializations.patch

17 files changed:
queue-4.17/dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch [new file with mode: 0644]
queue-4.17/ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch [new file with mode: 0644]
queue-4.17/l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch [new file with mode: 0644]
queue-4.17/llc-use-refcount_inc_not_zero-for-llc_sap_find.patch [new file with mode: 0644]
queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-counter-destruction.patch [new file with mode: 0644]
queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-mirror-resource-destruction.patch [new file with mode: 0644]
queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-resource-destruction.patch [new file with mode: 0644]
queue-4.17/mlxsw-core_acl_flex_actions-return-error-for-conflicting-actions.patch [new file with mode: 0644]
queue-4.17/net-aquantia-fix-iff_allmulti-flag-functionality.patch [new file with mode: 0644]
queue-4.17/net-mlx5e-properly-check-if-hairpin-is-possible-between-two-functions.patch [new file with mode: 0644]
queue-4.17/net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch [new file with mode: 0644]
queue-4.17/net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch [new file with mode: 0644]
queue-4.17/r8169-don-t-use-msi-x-on-rtl8168g.patch [new file with mode: 0644]
queue-4.17/rxrpc-fix-the-keepalive-generator.patch [new file with mode: 0644]
queue-4.17/series [new file with mode: 0644]
queue-4.17/vhost-reset-metadata-cache-when-initializing-new-iotlb.patch [new file with mode: 0644]
queue-4.17/vsock-split-dwork-to-avoid-reinitializations.patch [new file with mode: 0644]

diff --git a/queue-4.17/dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch b/queue-4.17/dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch
new file mode 100644 (file)
index 0000000..3bc55ee
--- /dev/null
@@ -0,0 +1,75 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+Date: Tue, 7 Aug 2018 20:03:57 +0300
+Subject: dccp: fix undefined behavior with 'cwnd' shift in ccid2_cwnd_restart()
+
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+
+[ Upstream commit 61ef4b07fcdc30535889990cf4229766502561cf ]
+
+The shift of 'cwnd' with '(now - hc->tx_lsndtime) / hc->tx_rto' value
+can lead to undefined behavior [1].
+
+In order to fix this use a gradual shift of the window with a 'while'
+loop, similar to what tcp_cwnd_restart() is doing.
+
+When comparing delta and RTO there is a minor difference between TCP
+and DCCP, the last one also invokes dccp_cwnd_restart() and reduces
+'cwnd' if delta equals RTO. That case is preserved in this change.
+
+[1]:
+[40850.963623] UBSAN: Undefined behaviour in net/dccp/ccids/ccid2.c:237:7
+[40851.043858] shift exponent 67 is too large for 32-bit type 'unsigned int'
+[40851.127163] CPU: 3 PID: 15940 Comm: netstress Tainted: G        W   E     4.18.0-rc7.x86_64 #1
+...
+[40851.377176] Call Trace:
+[40851.408503]  dump_stack+0xf1/0x17b
+[40851.451331]  ? show_regs_print_info+0x5/0x5
+[40851.503555]  ubsan_epilogue+0x9/0x7c
+[40851.548363]  __ubsan_handle_shift_out_of_bounds+0x25b/0x2b4
+[40851.617109]  ? __ubsan_handle_load_invalid_value+0x18f/0x18f
+[40851.686796]  ? xfrm4_output_finish+0x80/0x80
+[40851.739827]  ? lock_downgrade+0x6d0/0x6d0
+[40851.789744]  ? xfrm4_prepare_output+0x160/0x160
+[40851.845912]  ? ip_queue_xmit+0x810/0x1db0
+[40851.895845]  ? ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp]
+[40851.963530]  ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp]
+[40852.029063]  dccp_xmit_packet+0x1d3/0x720 [dccp]
+[40852.086254]  dccp_write_xmit+0x116/0x1d0 [dccp]
+[40852.142412]  dccp_sendmsg+0x428/0xb20 [dccp]
+[40852.195454]  ? inet_dccp_listen+0x200/0x200 [dccp]
+[40852.254833]  ? sched_clock+0x5/0x10
+[40852.298508]  ? sched_clock+0x5/0x10
+[40852.342194]  ? inet_create+0xdf0/0xdf0
+[40852.388988]  sock_sendmsg+0xd9/0x160
+...
+
+Fixes: 113ced1f52e5 ("dccp ccid-2: Perform congestion-window validation")
+Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ccids/ccid2.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/dccp/ccids/ccid2.c
++++ b/net/dccp/ccids/ccid2.c
+@@ -228,14 +228,16 @@ static void ccid2_cwnd_restart(struct so
+       struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+       u32 cwnd = hc->tx_cwnd, restart_cwnd,
+           iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache);
++      s32 delta = now - hc->tx_lsndtime;
+       hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2));
+       /* don't reduce cwnd below the initial window (IW) */
+       restart_cwnd = min(cwnd, iwnd);
+-      cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto;
+-      hc->tx_cwnd = max(cwnd, restart_cwnd);
++      while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd)
++              cwnd >>= 1;
++      hc->tx_cwnd = max(cwnd, restart_cwnd);
+       hc->tx_cwnd_stamp = now;
+       hc->tx_cwnd_used  = 0;
diff --git a/queue-4.17/ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch b/queue-4.17/ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch
new file mode 100644 (file)
index 0000000..3859451
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sun, 5 Aug 2018 22:46:07 +0800
+Subject: ip6_tunnel: use the right value for ipv4 min mtu check in ip6_tnl_xmit
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 82a40777de12728dedf4075453b694f0d1baee80 ]
+
+According to RFC791, 68 bytes is the minimum size of IPv4 datagram every
+device must be able to forward without further fragmentation while 576
+bytes is the minimum size of IPv4 datagram every device has to be able
+to receive, so in ip6_tnl_xmit(), 68(IPV4_MIN_MTU) should be the right
+value for the ipv4 min mtu check in ip6_tnl_xmit.
+
+While at it, change to use max() instead of if statement.
+
+Fixes: c9fefa08190f ("ip6_tunnel: get the min mtu properly in ip6_tnl_xmit")
+Reported-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_tunnel.c |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -1133,12 +1133,8 @@ route_lookup:
+               max_headroom += 8;
+               mtu -= 8;
+       }
+-      if (skb->protocol == htons(ETH_P_IPV6)) {
+-              if (mtu < IPV6_MIN_MTU)
+-                      mtu = IPV6_MIN_MTU;
+-      } else if (mtu < 576) {
+-              mtu = 576;
+-      }
++      mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
++                     IPV6_MIN_MTU : IPV4_MIN_MTU);
+       skb_dst_update_pmtu(skb, mtu);
+       if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
diff --git a/queue-4.17/l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch b/queue-4.17/l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch
new file mode 100644 (file)
index 0000000..7f2fd90
--- /dev/null
@@ -0,0 +1,95 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Wei Wang <weiwan@google.com>
+Date: Fri, 10 Aug 2018 11:14:56 -0700
+Subject: l2tp: use sk_dst_check() to avoid race on sk->sk_dst_cache
+
+From: Wei Wang <weiwan@google.com>
+
+[ Upstream commit 6d37fa49da1e8db8fb1995be22ac837ca41ac8a8 ]
+
+In l2tp code, if it is a L2TP_UDP_ENCAP tunnel, tunnel->sk points to a
+UDP socket. User could call sendmsg() on both this tunnel and the UDP
+socket itself concurrently. As l2tp_xmit_skb() holds socket lock and call
+__sk_dst_check() to refresh sk->sk_dst_cache, while udpv6_sendmsg() is
+lockless and call sk_dst_check() to refresh sk->sk_dst_cache, there
+could be a race and cause the dst cache to be freed multiple times.
+So we fix l2tp side code to always call sk_dst_check() to garantee
+xchg() is called when refreshing sk->sk_dst_cache to avoid race
+conditions.
+
+Syzkaller reported stack trace:
+BUG: KASAN: use-after-free in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline]
+BUG: KASAN: use-after-free in atomic_fetch_add_unless include/linux/atomic.h:575 [inline]
+BUG: KASAN: use-after-free in atomic_add_unless include/linux/atomic.h:597 [inline]
+BUG: KASAN: use-after-free in dst_hold_safe include/net/dst.h:308 [inline]
+BUG: KASAN: use-after-free in ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029
+Read of size 4 at addr ffff8801aea9a880 by task syz-executor129/4829
+
+CPU: 0 PID: 4829 Comm: syz-executor129 Not tainted 4.18.0-rc7-next-20180802+ #30
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
+ print_address_description+0x6c/0x20b mm/kasan/report.c:256
+ kasan_report_error mm/kasan/report.c:354 [inline]
+ kasan_report.cold.7+0x242/0x30d mm/kasan/report.c:412
+ check_memory_region_inline mm/kasan/kasan.c:260 [inline]
+ check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267
+ kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272
+ atomic_read include/asm-generic/atomic-instrumented.h:21 [inline]
+ atomic_fetch_add_unless include/linux/atomic.h:575 [inline]
+ atomic_add_unless include/linux/atomic.h:597 [inline]
+ dst_hold_safe include/net/dst.h:308 [inline]
+ ip6_hold_safe+0xe6/0x670 net/ipv6/route.c:1029
+ rt6_get_pcpu_route net/ipv6/route.c:1249 [inline]
+ ip6_pol_route+0x354/0xd20 net/ipv6/route.c:1922
+ ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2098
+ fib6_rule_lookup+0x283/0x890 net/ipv6/fib6_rules.c:122
+ ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2126
+ ip6_dst_lookup_tail+0x1278/0x1da0 net/ipv6/ip6_output.c:978
+ ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079
+ ip6_sk_dst_lookup_flow+0x5ed/0xc50 net/ipv6/ip6_output.c:1117
+ udpv6_sendmsg+0x2163/0x36b0 net/ipv6/udp.c:1354
+ inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
+ sock_sendmsg_nosec net/socket.c:622 [inline]
+ sock_sendmsg+0xd5/0x120 net/socket.c:632
+ ___sys_sendmsg+0x51d/0x930 net/socket.c:2115
+ __sys_sendmmsg+0x240/0x6f0 net/socket.c:2210
+ __do_sys_sendmmsg net/socket.c:2239 [inline]
+ __se_sys_sendmmsg net/socket.c:2236 [inline]
+ __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2236
+ do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x446a29
+Code: e8 ac b8 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007f4de5532db8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133
+RAX: ffffffffffffffda RBX: 00000000006dcc38 RCX: 0000000000446a29
+RDX: 00000000000000b8 RSI: 0000000020001b00 RDI: 0000000000000003
+RBP: 00000000006dcc30 R08: 00007f4de5533700 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dcc3c
+R13: 00007ffe2b830fdf R14: 00007f4de55339c0 R15: 0000000000000001
+
+Fixes: 71b1391a4128 ("l2tp: ensure sk->dst is still valid")
+Reported-by: syzbot+05f840f3b04f211bad55@syzkaller.appspotmail.com
+Signed-off-by: Wei Wang <weiwan@google.com>
+Signed-off-by: Martin KaFai Lau <kafai@fb.com>
+Cc: Guillaume Nault <g.nault@alphalink.fr>
+Cc: David Ahern <dsahern@gmail.com>
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -1110,7 +1110,7 @@ int l2tp_xmit_skb(struct l2tp_session *s
+       /* Get routing info from the tunnel socket */
+       skb_dst_drop(skb);
+-      skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0)));
++      skb_dst_set(skb, sk_dst_check(sk, 0));
+       inet = inet_sk(sk);
+       fl = &inet->cork.fl;
diff --git a/queue-4.17/llc-use-refcount_inc_not_zero-for-llc_sap_find.patch b/queue-4.17/llc-use-refcount_inc_not_zero-for-llc_sap_find.patch
new file mode 100644 (file)
index 0000000..81c750a
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Tue, 7 Aug 2018 12:41:38 -0700
+Subject: llc: use refcount_inc_not_zero() for llc_sap_find()
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 0dcb82254d65f72333aa50ad626d1e9665ad093b ]
+
+llc_sap_put() decreases the refcnt before deleting sap
+from the global list. Therefore, there is a chance
+llc_sap_find() could find a sap with zero refcnt
+in this global list.
+
+Close this race condition by checking if refcnt is zero
+or not in llc_sap_find(), if it is zero then it is being
+removed so we can just treat it as gone.
+
+Reported-by: <syzbot+278893f3f7803871f7ce@syzkaller.appspotmail.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/llc.h  |    5 +++++
+ net/llc/llc_core.c |    4 ++--
+ 2 files changed, 7 insertions(+), 2 deletions(-)
+
+--- a/include/net/llc.h
++++ b/include/net/llc.h
+@@ -116,6 +116,11 @@ static inline void llc_sap_hold(struct l
+       refcount_inc(&sap->refcnt);
+ }
++static inline bool llc_sap_hold_safe(struct llc_sap *sap)
++{
++      return refcount_inc_not_zero(&sap->refcnt);
++}
++
+ void llc_sap_close(struct llc_sap *sap);
+ static inline void llc_sap_put(struct llc_sap *sap)
+--- a/net/llc/llc_core.c
++++ b/net/llc/llc_core.c
+@@ -73,8 +73,8 @@ struct llc_sap *llc_sap_find(unsigned ch
+       rcu_read_lock_bh();
+       sap = __llc_sap_find(sap_value);
+-      if (sap)
+-              llc_sap_hold(sap);
++      if (!sap || !llc_sap_hold_safe(sap))
++              sap = NULL;
+       rcu_read_unlock_bh();
+       return sap;
+ }
diff --git a/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-counter-destruction.patch b/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-counter-destruction.patch
new file mode 100644 (file)
index 0000000..85d27a3
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Nir Dotan <nird@mellanox.com>
+Date: Fri, 3 Aug 2018 15:57:43 +0300
+Subject: mlxsw: core_acl_flex_actions: Remove redundant counter destruction
+
+From: Nir Dotan <nird@mellanox.com>
+
+[ Upstream commit 7cc6169493990dec488eda0a3f6612729ca25e81 ]
+
+Each tc flower rule uses a hidden count action. As counter resource may
+not be available due to limited HW resources, update _counter_create()
+and _counter_destroy() pair to follow previously introduced symmetric
+error condition handling, add a call to mlxsw_afa_resource_del() as part
+of the counter resource destruction.
+
+Fixes: c18c1e186ba8 ("mlxsw: core: Make counter index allocated inside the action append")
+Signed-off-by: Nir Dotan <nird@mellanox.com>
+Reviewed-by: Petr Machata <petrm@mellanox.com>
+Reviewed-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+@@ -584,6 +584,7 @@ static void
+ mlxsw_afa_counter_destroy(struct mlxsw_afa_block *block,
+                         struct mlxsw_afa_counter *counter)
+ {
++      mlxsw_afa_resource_del(&counter->resource);
+       block->afa->ops->counter_index_put(block->afa->ops_priv,
+                                          counter->counter_index);
+       kfree(counter);
diff --git a/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-mirror-resource-destruction.patch b/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-mirror-resource-destruction.patch
new file mode 100644 (file)
index 0000000..24b74f3
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Nir Dotan <nird@mellanox.com>
+Date: Fri, 3 Aug 2018 15:57:44 +0300
+Subject: mlxsw: core_acl_flex_actions: Remove redundant mirror resource destruction
+
+From: Nir Dotan <nird@mellanox.com>
+
+[ Upstream commit caebd1b389708bf3d0465be829480fc706a68720 ]
+
+In previous patch mlxsw_afa_resource_del() was added to avoid a duplicate
+resource detruction scenario.
+For mirror actions, such duplicate destruction leads to a crash as in:
+
+ # tc qdisc add dev swp49 ingress
+ # tc filter add dev swp49 parent ffff: \
+   protocol ip chain 100 pref 10 \
+   flower skip_sw dst_ip 192.168.101.1 action drop
+ # tc filter add dev swp49 parent ffff: \
+   protocol ip pref 10 \
+   flower skip_sw dst_ip 192.168.101.1 action goto chain 100 \
+   action mirred egress mirror dev swp4
+
+Therefore add a call to mlxsw_afa_resource_del() in
+mlxsw_afa_mirror_destroy() in order to clear that resource
+from rule's resources.
+
+Fixes: d0d13c1858a1 ("mlxsw: spectrum_acl: Add support for mirror action")
+Signed-off-by: Nir Dotan <nird@mellanox.com>
+Reviewed-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+@@ -862,6 +862,7 @@ static void
+ mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block,
+                        struct mlxsw_afa_mirror *mirror)
+ {
++      mlxsw_afa_resource_del(&mirror->resource);
+       block->afa->ops->mirror_del(block->afa->ops_priv,
+                                   mirror->local_in_port,
+                                   mirror->span_id,
diff --git a/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-resource-destruction.patch b/queue-4.17/mlxsw-core_acl_flex_actions-remove-redundant-resource-destruction.patch
new file mode 100644 (file)
index 0000000..0edba8d
--- /dev/null
@@ -0,0 +1,72 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Nir Dotan <nird@mellanox.com>
+Date: Fri, 3 Aug 2018 15:57:42 +0300
+Subject: mlxsw: core_acl_flex_actions: Remove redundant resource destruction
+
+From: Nir Dotan <nird@mellanox.com>
+
+[ Upstream commit dda0a3a3fb92451d4a922e56365ee1f73c8a9586 ]
+
+Some ACL actions require the allocation of a separate resource
+prior to applying the action itself. When facing an error condition
+during the setup phase of the action, resource should be destroyed.
+For such actions the destruction was done twice which is dangerous
+and lead to a potential crash.
+The destruction took place first upon error on action setup phase
+and then as the rule was destroyed.
+
+The following sequence generated a crash:
+
+ # tc qdisc add dev swp49 ingress
+ # tc filter add dev swp49 parent ffff: \
+   protocol ip chain 100 pref 10 \
+   flower skip_sw dst_ip 192.168.101.1 action drop
+ # tc filter add dev swp49 parent ffff: \
+   protocol ip pref 10 \
+   flower skip_sw dst_ip 192.168.101.1 action goto chain 100 \
+   action mirred egress mirror dev swp4
+
+Therefore add mlxsw_afa_resource_del() as a complement of
+mlxsw_afa_resource_add() to add symmetry to resource_list membership
+handling. Call this from mlxsw_afa_fwd_entry_ref_destroy() to make the
+_fwd_entry_ref_create() and _fwd_entry_ref_destroy() pair of calls a
+NOP.
+
+Fixes: 140ce421217e ("mlxsw: core: Convert fwd_entry_ref list to be generic per-block resource list")
+Signed-off-by: Nir Dotan <nird@mellanox.com>
+Reviewed-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+@@ -327,12 +327,16 @@ static void mlxsw_afa_resource_add(struc
+       list_add(&resource->list, &block->resource_list);
+ }
++static void mlxsw_afa_resource_del(struct mlxsw_afa_resource *resource)
++{
++      list_del(&resource->list);
++}
++
+ static void mlxsw_afa_resources_destroy(struct mlxsw_afa_block *block)
+ {
+       struct mlxsw_afa_resource *resource, *tmp;
+       list_for_each_entry_safe(resource, tmp, &block->resource_list, list) {
+-              list_del(&resource->list);
+               resource->destructor(block, resource);
+       }
+ }
+@@ -530,6 +534,7 @@ static void
+ mlxsw_afa_fwd_entry_ref_destroy(struct mlxsw_afa_block *block,
+                               struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref)
+ {
++      mlxsw_afa_resource_del(&fwd_entry_ref->resource);
+       mlxsw_afa_fwd_entry_put(block->afa, fwd_entry_ref->fwd_entry);
+       kfree(fwd_entry_ref);
+ }
diff --git a/queue-4.17/mlxsw-core_acl_flex_actions-return-error-for-conflicting-actions.patch b/queue-4.17/mlxsw-core_acl_flex_actions-return-error-for-conflicting-actions.patch
new file mode 100644 (file)
index 0000000..1bdb3bb
--- /dev/null
@@ -0,0 +1,163 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Nir Dotan <nird@mellanox.com>
+Date: Fri, 3 Aug 2018 15:57:41 +0300
+Subject: mlxsw: core_acl_flex_actions: Return error for conflicting actions
+
+From: Nir Dotan <nird@mellanox.com>
+
+[ Upstream commit 3757b255bf20ae3c941abae7624ff215bfd9ef05 ]
+
+Spectrum switch ACL action set is built in groups of three actions
+which may point to additional actions. A group holds a single record
+which can be set as goto record for pointing at a following group
+or can be set to mark the termination of the lookup. This is perfectly
+adequate for handling a series of actions to be executed on a packet.
+While the SW model allows configuration of conflicting actions
+where it is clear that some actions will never execute, the mlxsw
+driver must block such configurations as it creates a conflict
+over the single terminate/goto record value.
+
+For a conflicting actions configuration such as:
+
+ # tc filter add dev swp49 parent ffff: \
+   protocol ip pref 10 \
+   flower skip_sw dst_ip 192.168.101.1 \
+   action goto chain 100 \
+   action mirred egress mirror dev swp4
+
+Where it is clear that the last action will never execute, the
+mlxsw driver was issuing a warning instead of returning an error.
+Therefore replace that warning with an error for this specific
+case.
+
+Fixes: 4cda7d8d7098 ("mlxsw: core: Introduce flexible actions support")
+Signed-off-by: Nir Dotan <nird@mellanox.com>
+Reviewed-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c |   42 ++++++------
+ 1 file changed, 21 insertions(+), 21 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+@@ -626,8 +626,8 @@ static char *mlxsw_afa_block_append_acti
+       char *oneact;
+       char *actions;
+-      if (WARN_ON(block->finished))
+-              return NULL;
++      if (block->finished)
++              return ERR_PTR(-EINVAL);
+       if (block->cur_act_index + action_size >
+           block->afa->max_acts_per_set) {
+               struct mlxsw_afa_set *set;
+@@ -637,7 +637,7 @@ static char *mlxsw_afa_block_append_acti
+                */
+               set = mlxsw_afa_set_create(false);
+               if (!set)
+-                      return NULL;
++                      return ERR_PTR(-ENOBUFS);
+               set->prev = block->cur_set;
+               block->cur_act_index = 0;
+               block->cur_set->next = set;
+@@ -724,8 +724,8 @@ int mlxsw_afa_block_append_vlan_modify(s
+                                                 MLXSW_AFA_VLAN_CODE,
+                                                 MLXSW_AFA_VLAN_SIZE);
+-      if (!act)
+-              return -ENOBUFS;
++      if (IS_ERR(act))
++              return PTR_ERR(act);
+       mlxsw_afa_vlan_pack(act, MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP,
+                           MLXSW_AFA_VLAN_CMD_SET_OUTER, vid,
+                           MLXSW_AFA_VLAN_CMD_SET_OUTER, pcp,
+@@ -806,8 +806,8 @@ int mlxsw_afa_block_append_drop(struct m
+                                                 MLXSW_AFA_TRAPDISC_CODE,
+                                                 MLXSW_AFA_TRAPDISC_SIZE);
+-      if (!act)
+-              return -ENOBUFS;
++      if (IS_ERR(act))
++              return PTR_ERR(act);
+       mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP,
+                               MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, 0);
+       return 0;
+@@ -820,8 +820,8 @@ int mlxsw_afa_block_append_trap(struct m
+                                                 MLXSW_AFA_TRAPDISC_CODE,
+                                                 MLXSW_AFA_TRAPDISC_SIZE);
+-      if (!act)
+-              return -ENOBUFS;
++      if (IS_ERR(act))
++              return PTR_ERR(act);
+       mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP,
+                               MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD,
+                               trap_id);
+@@ -836,8 +836,8 @@ int mlxsw_afa_block_append_trap_and_forw
+                                                 MLXSW_AFA_TRAPDISC_CODE,
+                                                 MLXSW_AFA_TRAPDISC_SIZE);
+-      if (!act)
+-              return -ENOBUFS;
++      if (IS_ERR(act))
++              return PTR_ERR(act);
+       mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP,
+                               MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD,
+                               trap_id);
+@@ -908,8 +908,8 @@ mlxsw_afa_block_append_allocated_mirror(
+       char *act = mlxsw_afa_block_append_action(block,
+                                                 MLXSW_AFA_TRAPDISC_CODE,
+                                                 MLXSW_AFA_TRAPDISC_SIZE);
+-      if (!act)
+-              return -ENOBUFS;
++      if (IS_ERR(act))
++              return PTR_ERR(act);
+       mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP,
+                               MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, 0);
+       mlxsw_afa_trapdisc_mirror_pack(act, true, mirror_agent);
+@@ -996,8 +996,8 @@ int mlxsw_afa_block_append_fwd(struct ml
+       act = mlxsw_afa_block_append_action(block, MLXSW_AFA_FORWARD_CODE,
+                                           MLXSW_AFA_FORWARD_SIZE);
+-      if (!act) {
+-              err = -ENOBUFS;
++      if (IS_ERR(act)) {
++              err = PTR_ERR(act);
+               goto err_append_action;
+       }
+       mlxsw_afa_forward_pack(act, MLXSW_AFA_FORWARD_TYPE_PBS,
+@@ -1052,8 +1052,8 @@ int mlxsw_afa_block_append_allocated_cou
+ {
+       char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_POLCNT_CODE,
+                                                 MLXSW_AFA_POLCNT_SIZE);
+-      if (!act)
+-              return -ENOBUFS;
++      if (IS_ERR(act))
++              return PTR_ERR(act);
+       mlxsw_afa_polcnt_pack(act, MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES,
+                             counter_index);
+       return 0;
+@@ -1123,8 +1123,8 @@ int mlxsw_afa_block_append_fid_set(struc
+       char *act = mlxsw_afa_block_append_action(block,
+                                                 MLXSW_AFA_VIRFWD_CODE,
+                                                 MLXSW_AFA_VIRFWD_SIZE);
+-      if (!act)
+-              return -ENOBUFS;
++      if (IS_ERR(act))
++              return PTR_ERR(act);
+       mlxsw_afa_virfwd_pack(act, MLXSW_AFA_VIRFWD_FID_CMD_SET, fid);
+       return 0;
+ }
+@@ -1193,8 +1193,8 @@ int mlxsw_afa_block_append_mcrouter(stru
+       char *act = mlxsw_afa_block_append_action(block,
+                                                 MLXSW_AFA_MCROUTER_CODE,
+                                                 MLXSW_AFA_MCROUTER_SIZE);
+-      if (!act)
+-              return -ENOBUFS;
++      if (IS_ERR(act))
++              return PTR_ERR(act);
+       mlxsw_afa_mcrouter_pack(act, MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP,
+                               expected_irif, min_mtu, rmid_valid, kvdl_index);
+       return 0;
diff --git a/queue-4.17/net-aquantia-fix-iff_allmulti-flag-functionality.patch b/queue-4.17/net-aquantia-fix-iff_allmulti-flag-functionality.patch
new file mode 100644 (file)
index 0000000..5e34fcd
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com>
+Date: Wed, 8 Aug 2018 14:06:32 +0300
+Subject: net: aquantia: Fix IFF_ALLMULTI flag functionality
+
+From: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com>
+
+[ Upstream commit 11ba961c916127651e12af6cad3891f8aeb25aa9 ]
+
+It was noticed that NIC always pass all multicast traffic to the host
+regardless of IFF_ALLMULTI flag on the interface.
+The rule in MC Filter Table in NIC, that is configured to accept any
+multicast packets, is turning on if IFF_MULTICAST flag is set on the
+interface. It leads to passing all multicast traffic to the host.
+This fix changes the condition to turn on that rule by checking
+IFF_ALLMULTI flag as it should.
+
+Fixes: b21f502f84be ("net:ethernet:aquantia: Fix for multicast filter handling.")
+Signed-off-by: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
++++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+@@ -762,7 +762,7 @@ static int hw_atl_b0_hw_packet_filter_se
+       hw_atl_rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC));
+       hw_atl_rpfl2multicast_flr_en_set(self,
+-                                       IS_FILTER_ENABLED(IFF_MULTICAST), 0);
++                                       IS_FILTER_ENABLED(IFF_ALLMULTI), 0);
+       hw_atl_rpfl2_accept_all_mc_packets_set(self,
+                                              IS_FILTER_ENABLED(IFF_ALLMULTI));
diff --git a/queue-4.17/net-mlx5e-properly-check-if-hairpin-is-possible-between-two-functions.patch b/queue-4.17/net-mlx5e-properly-check-if-hairpin-is-possible-between-two-functions.patch
new file mode 100644 (file)
index 0000000..86c8fab
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Or Gerlitz <ogerlitz@mellanox.com>
+Date: Wed, 8 Aug 2018 15:48:07 -0700
+Subject: net/mlx5e: Properly check if hairpin is possible between two functions
+
+From: Or Gerlitz <ogerlitz@mellanox.com>
+
+[ Upstream commit 816f670623692b5da2787f278cbfdb331ed29b8a ]
+
+The current check relies on function BDF addresses and can get
+us wrong e.g when two VFs are assigned into a VM and the PCI
+v-address is set by the hypervisor.
+
+Fixes: 5c65c564c962 ('net/mlx5e: Support offloading TC NIC hairpin flows')
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Reported-by: Alaa Hleihel <alaa@mellanox.com>
+Tested-by: Alaa Hleihel <alaa@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -1907,15 +1907,15 @@ static bool actions_match_supported(stru
+ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
+ {
+       struct mlx5_core_dev *fmdev, *pmdev;
+-      u16 func_id, peer_id;
++      u64 fsystem_guid, psystem_guid;
+       fmdev = priv->mdev;
+       pmdev = peer_priv->mdev;
+-      func_id = (u16)((fmdev->pdev->bus->number << 8) | PCI_SLOT(fmdev->pdev->devfn));
+-      peer_id = (u16)((pmdev->pdev->bus->number << 8) | PCI_SLOT(pmdev->pdev->devfn));
++      mlx5_query_nic_vport_system_image_guid(fmdev, &fsystem_guid);
++      mlx5_query_nic_vport_system_image_guid(pmdev, &psystem_guid);
+-      return (func_id == peer_id);
++      return (fsystem_guid == psystem_guid);
+ }
+ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
diff --git a/queue-4.17/net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch b/queue-4.17/net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch
new file mode 100644 (file)
index 0000000..f76d32f
--- /dev/null
@@ -0,0 +1,84 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Mon, 13 Aug 2018 18:44:04 +0800
+Subject: net_sched: Fix missing res info when create new tc_index filter
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 008369dcc5f7bfba526c98054f8525322acf0ea3 ]
+
+Li Shuang reported the following warn:
+
+[  733.484610] WARNING: CPU: 6 PID: 21123 at net/sched/sch_cbq.c:1418 cbq_destroy_class+0x5d/0x70 [sch_cbq]
+[  733.495190] Modules linked in: sch_cbq cls_tcindex sch_dsmark rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat l
+[  733.574155]  syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm igb ixgbe ahci libahci i2c_algo_bit libata i40e i2c_core dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod
+[  733.592500] CPU: 6 PID: 21123 Comm: tc Not tainted 4.18.0-rc8.latest+ #131
+[  733.600169] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016
+[  733.608518] RIP: 0010:cbq_destroy_class+0x5d/0x70 [sch_cbq]
+[  733.614734] Code: e7 d9 d2 48 8b 7b 48 e8 61 05 da d2 48 8d bb f8 00 00 00 e8 75 ae d5 d2 48 39 eb 74 0a 48 89 df 5b 5d e9 16 6c 94 d2 5b 5d c3 <0f> 0b eb b6 0f 1f 44 00 00 66 2e 0f 1f 84
+[  733.635798] RSP: 0018:ffffbfbb066bb9d8 EFLAGS: 00010202
+[  733.641627] RAX: 0000000000000001 RBX: ffff9cdd17392800 RCX: 000000008010000f
+[  733.649588] RDX: ffff9cdd1df547e0 RSI: ffff9cdd17392800 RDI: ffff9cdd0f84c800
+[  733.657547] RBP: ffff9cdd0f84c800 R08: 0000000000000001 R09: 0000000000000000
+[  733.665508] R10: ffff9cdd0f84d000 R11: 0000000000000001 R12: 0000000000000001
+[  733.673469] R13: 0000000000000000 R14: 0000000000000001 R15: ffff9cdd17392200
+[  733.681430] FS:  00007f911890a740(0000) GS:ffff9cdd1f8c0000(0000) knlGS:0000000000000000
+[  733.690456] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  733.696864] CR2: 0000000000b5544c CR3: 0000000859374002 CR4: 00000000001606e0
+[  733.704826] Call Trace:
+[  733.707554]  cbq_destroy+0xa1/0xd0 [sch_cbq]
+[  733.712318]  qdisc_destroy+0x62/0x130
+[  733.716401]  dsmark_destroy+0x2a/0x70 [sch_dsmark]
+[  733.721745]  qdisc_destroy+0x62/0x130
+[  733.725829]  qdisc_graft+0x3ba/0x470
+[  733.729817]  tc_get_qdisc+0x2a6/0x2c0
+[  733.733901]  ? cred_has_capability+0x7d/0x130
+[  733.738761]  rtnetlink_rcv_msg+0x263/0x2d0
+[  733.743330]  ? rtnl_calcit.isra.30+0x110/0x110
+[  733.748287]  netlink_rcv_skb+0x4d/0x130
+[  733.752576]  netlink_unicast+0x1a3/0x250
+[  733.756949]  netlink_sendmsg+0x2ae/0x3a0
+[  733.761324]  sock_sendmsg+0x36/0x40
+[  733.765213]  ___sys_sendmsg+0x26f/0x2d0
+[  733.769493]  ? handle_pte_fault+0x586/0xdf0
+[  733.774158]  ? __handle_mm_fault+0x389/0x500
+[  733.778919]  ? __sys_sendmsg+0x5e/0xa0
+[  733.783099]  __sys_sendmsg+0x5e/0xa0
+[  733.787087]  do_syscall_64+0x5b/0x180
+[  733.791171]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  733.796805] RIP: 0033:0x7f9117f23f10
+[  733.800791] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8
+[  733.821873] RSP: 002b:00007ffe96818398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+[  733.830319] RAX: ffffffffffffffda RBX: 000000005b71244c RCX: 00007f9117f23f10
+[  733.838280] RDX: 0000000000000000 RSI: 00007ffe968183e0 RDI: 0000000000000003
+[  733.846241] RBP: 00007ffe968183e0 R08: 000000000000ffff R09: 0000000000000003
+[  733.854202] R10: 00007ffe96817e20 R11: 0000000000000246 R12: 0000000000000000
+[  733.862161] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000
+[  733.870121] ---[ end trace 28edd4aad712ddca ]---
+
+This is because we didn't update f->result.res when create new filter. Then in
+tcindex_delete() -> tcf_unbind_filter(), we will failed to find out the res
+and unbind filter, which will trigger the WARN_ON() in cbq_destroy_class().
+
+Fix it by updating f->result.res when create new filter.
+
+Fixes: 6e0565697a106 ("net_sched: fix another crash in cls_tcindex")
+Reported-by: Li Shuang <shuali@redhat.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_tcindex.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/sched/cls_tcindex.c
++++ b/net/sched/cls_tcindex.c
+@@ -486,6 +486,7 @@ tcindex_set_parms(struct net *net, struc
+               struct tcindex_filter *nfp;
+               struct tcindex_filter __rcu **fp;
++              f->result.res = r->res;
+               tcf_exts_change(&f->result.exts, &r->exts);
+               fp = cp->h + (handle % cp->hash);
diff --git a/queue-4.17/net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch b/queue-4.17/net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch
new file mode 100644 (file)
index 0000000..3bbe053
--- /dev/null
@@ -0,0 +1,120 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Mon, 13 Aug 2018 18:44:03 +0800
+Subject: net_sched: fix NULL pointer dereference when delete tcindex filter
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 2df8bee5654bb2b7312662ca6810d4dc16b0b67f ]
+
+Li Shuang reported the following crash:
+
+[   71.267724] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
+[   71.276456] PGD 800000085d9bd067 P4D 800000085d9bd067 PUD 859a0b067 PMD 0
+[   71.284127] Oops: 0000 [#1] SMP PTI
+[   71.288015] CPU: 12 PID: 2386 Comm: tc Not tainted 4.18.0-rc8.latest+ #131
+[   71.295686] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016
+[   71.304037] RIP: 0010:tcindex_delete+0x72/0x280 [cls_tcindex]
+[   71.310446] Code: 00 31 f6 48 87 75 20 48 85 f6 74 11 48 8b 47 18 48 8b 40 08 48 8b 40 50 e8 fb a6 f8 fc 48 85 db 0f 84 dc 00 00 00 48 8b 73 18 <8b> 56 04 48 8d 7e 04 85 d2 0f 84 7b 01 00
+[   71.331517] RSP: 0018:ffffb45207b3f898 EFLAGS: 00010282
+[   71.337345] RAX: ffff8ad3d72d6360 RBX: ffff8acc84393680 RCX: 000000000000002e
+[   71.345306] RDX: ffff8ad3d72c8570 RSI: 0000000000000000 RDI: ffff8ad847a45800
+[   71.353277] RBP: ffff8acc84393688 R08: ffff8ad3d72c8400 R09: 0000000000000000
+[   71.361238] R10: ffff8ad3de786e00 R11: 0000000000000000 R12: ffffb45207b3f8c7
+[   71.369199] R13: ffff8ad3d93bd2a0 R14: 000000000000002e R15: ffff8ad3d72c9600
+[   71.377161] FS:  00007f9d3ec3e740(0000) GS:ffff8ad3df980000(0000) knlGS:0000000000000000
+[   71.386188] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   71.392597] CR2: 0000000000000004 CR3: 0000000852f06003 CR4: 00000000001606e0
+[   71.400558] Call Trace:
+[   71.403299]  tcindex_destroy_element+0x25/0x40 [cls_tcindex]
+[   71.409611]  tcindex_walk+0xbb/0x110 [cls_tcindex]
+[   71.414953]  tcindex_destroy+0x44/0x90 [cls_tcindex]
+[   71.420492]  ? tcindex_delete+0x280/0x280 [cls_tcindex]
+[   71.426323]  tcf_proto_destroy+0x16/0x40
+[   71.430696]  tcf_chain_flush+0x51/0x70
+[   71.434876]  tcf_block_put_ext.part.30+0x8f/0x1b0
+[   71.440122]  tcf_block_put+0x4d/0x70
+[   71.444108]  cbq_destroy+0x4d/0xd0 [sch_cbq]
+[   71.448869]  qdisc_destroy+0x62/0x130
+[   71.452951]  dsmark_destroy+0x2a/0x70 [sch_dsmark]
+[   71.458300]  qdisc_destroy+0x62/0x130
+[   71.462373]  qdisc_graft+0x3ba/0x470
+[   71.466359]  tc_get_qdisc+0x2a6/0x2c0
+[   71.470443]  ? cred_has_capability+0x7d/0x130
+[   71.475307]  rtnetlink_rcv_msg+0x263/0x2d0
+[   71.479875]  ? rtnl_calcit.isra.30+0x110/0x110
+[   71.484832]  netlink_rcv_skb+0x4d/0x130
+[   71.489109]  netlink_unicast+0x1a3/0x250
+[   71.493482]  netlink_sendmsg+0x2ae/0x3a0
+[   71.497859]  sock_sendmsg+0x36/0x40
+[   71.501748]  ___sys_sendmsg+0x26f/0x2d0
+[   71.506029]  ? handle_pte_fault+0x586/0xdf0
+[   71.510694]  ? __handle_mm_fault+0x389/0x500
+[   71.515457]  ? __sys_sendmsg+0x5e/0xa0
+[   71.519636]  __sys_sendmsg+0x5e/0xa0
+[   71.523626]  do_syscall_64+0x5b/0x180
+[   71.527711]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[   71.533345] RIP: 0033:0x7f9d3e257f10
+[   71.537331] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8
+[   71.558401] RSP: 002b:00007fff6f893398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+[   71.566848] RAX: ffffffffffffffda RBX: 000000005b71274d RCX: 00007f9d3e257f10
+[   71.574810] RDX: 0000000000000000 RSI: 00007fff6f8933e0 RDI: 0000000000000003
+[   71.582770] RBP: 00007fff6f8933e0 R08: 000000000000ffff R09: 0000000000000003
+[   71.590729] R10: 00007fff6f892e20 R11: 0000000000000246 R12: 0000000000000000
+[   71.598689] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000
+[   71.606651] Modules linked in: sch_cbq cls_tcindex sch_dsmark xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_coni
+[   71.685425]  libahci i2c_algo_bit i2c_core i40e libata dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod
+[   71.697075] CR2: 0000000000000004
+[   71.700792] ---[ end trace f604eb1acacd978b ]---
+
+Reproducer:
+tc qdisc add dev lo handle 1:0 root dsmark indices 64 set_tc_index
+tc filter add dev lo parent 1:0 protocol ip prio 1 tcindex mask 0xfc shift 2
+tc qdisc add dev lo parent 1:0 handle 2:0 cbq bandwidth 10Mbit cell 8 avpkt 1000 mpu 64
+tc class add dev lo parent 2:0 classid 2:1 cbq bandwidth 10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated allot 1514 weight 1 maxburst 10
+tc filter add dev lo parent 2:0 protocol ip prio 1 handle 0x2e tcindex classid 2:1 pass_on
+tc qdisc add dev lo parent 2:1 pfifo limit 5
+tc qdisc del dev lo root
+
+This is because in tcindex_set_parms, when there is no old_r, we set new
+exts to cr.exts. And we didn't set it to filter when r == &new_filter_result.
+
+Then in tcindex_delete() -> tcf_exts_get_net(), we will get NULL pointer
+dereference as we didn't init exts.
+
+Fix it by moving tcf_exts_change() after "if (old_r && old_r != r)" check.
+Then we don't need "cr" as there is no errout after that.
+
+Fixes: bf63ac73b3e13 ("net_sched: fix an oops in tcindex filter")
+Reported-by: Li Shuang <shuali@redhat.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_tcindex.c |    7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/net/sched/cls_tcindex.c
++++ b/net/sched/cls_tcindex.c
+@@ -468,11 +468,6 @@ tcindex_set_parms(struct net *net, struc
+               tcf_bind_filter(tp, &cr.res, base);
+       }
+-      if (old_r)
+-              tcf_exts_change(&r->exts, &e);
+-      else
+-              tcf_exts_change(&cr.exts, &e);
+-
+       if (old_r && old_r != r) {
+               err = tcindex_filter_result_init(old_r);
+               if (err < 0) {
+@@ -483,6 +478,8 @@ tcindex_set_parms(struct net *net, struc
+       oldp = p;
+       r->res = cr.res;
++      tcf_exts_change(&r->exts, &e);
++
+       rcu_assign_pointer(tp->root, cp);
+       if (r == &new_filter_result) {
diff --git a/queue-4.17/r8169-don-t-use-msi-x-on-rtl8168g.patch b/queue-4.17/r8169-don-t-use-msi-x-on-rtl8168g.patch
new file mode 100644 (file)
index 0000000..8c91c47
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Heiner Kallweit <hkallweit1@gmail.com>
+Date: Sun, 12 Aug 2018 13:26:26 +0200
+Subject: r8169: don't use MSI-X on RTL8168g
+
+From: Heiner Kallweit <hkallweit1@gmail.com>
+
+[ Upstream commit 7c53a722459c1d6ffb0f5b2058c06ca8980b8600 ]
+
+There have been two reports that network doesn't come back on resume
+from suspend when using MSI-X. Both cases affect the same chip version
+(RTL8168g - version 40), on different systems. Falling back to MSI
+fixes the issue.
+Even though we don't really have a proof yet that the network chip
+version is to blame, let's disable MSI-X for this version.
+
+Reported-by: Steve Dodd <steved424@gmail.com>
+Reported-by: Lou Reed <gogen@disroot.org>
+Tested-by: Steve Dodd <steved424@gmail.com>
+Tested-by: Lou Reed <gogen@disroot.org>
+Fixes: 6c6aa15fdea5 ("r8169: improve interrupt handling")
+Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/r8169.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/net/ethernet/realtek/r8169.c
++++ b/drivers/net/ethernet/realtek/r8169.c
+@@ -8066,6 +8066,11 @@ static int rtl_alloc_irq(struct rtl8169_
+               RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
+               RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+               flags = PCI_IRQ_LEGACY;
++      } else if (tp->mac_version == RTL_GIGA_MAC_VER_40) {
++              /* This version was reported to have issues with resume
++               * from suspend when using MSI-X
++               */
++              flags = PCI_IRQ_LEGACY | PCI_IRQ_MSI;
+       } else {
+               flags = PCI_IRQ_ALL_TYPES;
+       }
diff --git a/queue-4.17/rxrpc-fix-the-keepalive-generator.patch b/queue-4.17/rxrpc-fix-the-keepalive-generator.patch
new file mode 100644 (file)
index 0000000..878bf6d
--- /dev/null
@@ -0,0 +1,454 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: David Howells <dhowells@redhat.com>
+Date: Wed, 8 Aug 2018 11:30:02 +0100
+Subject: rxrpc: Fix the keepalive generator [ver #2]
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 330bdcfadceea5e9a1526d731711e163f9a90975 ]
+
+AF_RXRPC has a keepalive message generator that generates a message for a
+peer ~20s after the last transmission to that peer to keep firewall ports
+open.  The implementation is incorrect in the following ways:
+
+ (1) It mixes up ktime_t and time64_t types.
+
+ (2) It uses ktime_get_real(), the output of which may jump forward or
+     backward due to adjustments to the time of day.
+
+ (3) If the current time jumps forward too much or jumps backwards, the
+     generator function will crank the base of the time ring round one slot
+     at a time (ie. a 1s period) until it catches up, spewing out VERSION
+     packets as it goes.
+
+Fix the problem by:
+
+ (1) Only using time64_t.  There's no need for sub-second resolution.
+
+ (2) Use ktime_get_seconds() rather than ktime_get_real() so that time
+     isn't perceived to go backwards.
+
+ (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two
+     parts:
+
+     (a) The "worker" function that manages the buckets and the timer.
+
+     (b) The "dispatch" function that takes the pending peers and
+        potentially transmits a keepalive packet before putting them back
+        in the ring into the slot appropriate to the revised last-Tx time.
+
+ (4) Taking everything that's pending out of the ring and splicing it into
+     a temporary collector list for processing.
+
+     In the case that there's been a significant jump forward, the ring
+     gets entirely emptied and then the time base can be warped forward
+     before the peers are processed.
+
+     The warping can't happen if the ring isn't empty because the slot a
+     peer is in is keepalive-time dependent, relative to the base time.
+
+ (5) Limit the number of iterations of the bucket array when scanning it.
+
+ (6) Set the timer to skip any empty slots as there's no point waking up if
+     there's nothing to do yet.
+
+This can be triggered by an incoming call from a server after a reboot with
+AF_RXRPC and AFS built into the kernel causing a peer record to be set up
+before userspace is started.  The system clock is then adjusted by
+userspace, thereby potentially causing the keepalive generator to have a
+meltdown - which leads to a message like:
+
+       watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23]
+       ...
+       Workqueue: krxrpcd rxrpc_peer_keepalive_worker
+       EIP: lock_acquire+0x69/0x80
+       ...
+       Call Trace:
+        ? rxrpc_peer_keepalive_worker+0x5e/0x350
+        ? _raw_spin_lock_bh+0x29/0x60
+        ? rxrpc_peer_keepalive_worker+0x5e/0x350
+        ? rxrpc_peer_keepalive_worker+0x5e/0x350
+        ? __lock_acquire+0x3d3/0x870
+        ? process_one_work+0x110/0x340
+        ? process_one_work+0x166/0x340
+        ? process_one_work+0x110/0x340
+        ? worker_thread+0x39/0x3c0
+        ? kthread+0xdb/0x110
+        ? cancel_delayed_work+0x90/0x90
+        ? kthread_stop+0x70/0x70
+        ? ret_from_fork+0x19/0x24
+
+Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive")
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rxrpc/ar-internal.h |    8 +-
+ net/rxrpc/conn_event.c  |    4 -
+ net/rxrpc/net_ns.c      |    6 -
+ net/rxrpc/output.c      |   12 +--
+ net/rxrpc/peer_event.c  |  162 ++++++++++++++++++++++++++----------------------
+ net/rxrpc/peer_object.c |    8 +-
+ net/rxrpc/rxkad.c       |    4 -
+ 7 files changed, 112 insertions(+), 92 deletions(-)
+
+--- a/net/rxrpc/ar-internal.h
++++ b/net/rxrpc/ar-internal.h
+@@ -104,9 +104,9 @@ struct rxrpc_net {
+ #define RXRPC_KEEPALIVE_TIME 20 /* NAT keepalive time in seconds */
+       u8                      peer_keepalive_cursor;
+-      ktime_t                 peer_keepalive_base;
+-      struct hlist_head       peer_keepalive[RXRPC_KEEPALIVE_TIME + 1];
+-      struct hlist_head       peer_keepalive_new;
++      time64_t                peer_keepalive_base;
++      struct list_head        peer_keepalive[32];
++      struct list_head        peer_keepalive_new;
+       struct timer_list       peer_keepalive_timer;
+       struct work_struct      peer_keepalive_work;
+ };
+@@ -295,7 +295,7 @@ struct rxrpc_peer {
+       struct hlist_head       error_targets;  /* targets for net error distribution */
+       struct work_struct      error_distributor;
+       struct rb_root          service_conns;  /* Service connections */
+-      struct hlist_node       keepalive_link; /* Link in net->peer_keepalive[] */
++      struct list_head        keepalive_link; /* Link in net->peer_keepalive[] */
+       time64_t                last_tx_at;     /* Last time packet sent here */
+       seqlock_t               service_conn_lock;
+       spinlock_t              lock;           /* access lock */
+--- a/net/rxrpc/conn_event.c
++++ b/net/rxrpc/conn_event.c
+@@ -136,7 +136,7 @@ static void rxrpc_conn_retransmit_call(s
+       }
+       ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
+-      conn->params.peer->last_tx_at = ktime_get_real();
++      conn->params.peer->last_tx_at = ktime_get_seconds();
+       if (ret < 0)
+               trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+                                   rxrpc_tx_fail_call_final_resend);
+@@ -245,7 +245,7 @@ static int rxrpc_abort_connection(struct
+               return -EAGAIN;
+       }
+-      conn->params.peer->last_tx_at = ktime_get_real();
++      conn->params.peer->last_tx_at = ktime_get_seconds();
+       _leave(" = 0");
+       return 0;
+--- a/net/rxrpc/net_ns.c
++++ b/net/rxrpc/net_ns.c
+@@ -85,12 +85,12 @@ static __net_init int rxrpc_init_net(str
+       hash_init(rxnet->peer_hash);
+       spin_lock_init(&rxnet->peer_hash_lock);
+       for (i = 0; i < ARRAY_SIZE(rxnet->peer_keepalive); i++)
+-              INIT_HLIST_HEAD(&rxnet->peer_keepalive[i]);
+-      INIT_HLIST_HEAD(&rxnet->peer_keepalive_new);
++              INIT_LIST_HEAD(&rxnet->peer_keepalive[i]);
++      INIT_LIST_HEAD(&rxnet->peer_keepalive_new);
+       timer_setup(&rxnet->peer_keepalive_timer,
+                   rxrpc_peer_keepalive_timeout, 0);
+       INIT_WORK(&rxnet->peer_keepalive_work, rxrpc_peer_keepalive_worker);
+-      rxnet->peer_keepalive_base = ktime_add(ktime_get_real(), NSEC_PER_SEC);
++      rxnet->peer_keepalive_base = ktime_get_seconds();
+       ret = -ENOMEM;
+       rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net);
+--- a/net/rxrpc/output.c
++++ b/net/rxrpc/output.c
+@@ -209,7 +209,7 @@ int rxrpc_send_ack_packet(struct rxrpc_c
+       now = ktime_get_real();
+       if (ping)
+               call->ping_time = now;
+-      conn->params.peer->last_tx_at = ktime_get_real();
++      conn->params.peer->last_tx_at = ktime_get_seconds();
+       if (ret < 0)
+               trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+                                   rxrpc_tx_fail_call_ack);
+@@ -296,7 +296,7 @@ int rxrpc_send_abort_packet(struct rxrpc
+       ret = kernel_sendmsg(conn->params.local->socket,
+                            &msg, iov, 1, sizeof(pkt));
+-      conn->params.peer->last_tx_at = ktime_get_real();
++      conn->params.peer->last_tx_at = ktime_get_seconds();
+       if (ret < 0)
+               trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+                                   rxrpc_tx_fail_call_abort);
+@@ -391,7 +391,7 @@ int rxrpc_send_data_packet(struct rxrpc_
+        *     message and update the peer record
+        */
+       ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
+-      conn->params.peer->last_tx_at = ktime_get_real();
++      conn->params.peer->last_tx_at = ktime_get_seconds();
+       up_read(&conn->params.local->defrag_sem);
+       if (ret < 0)
+@@ -457,7 +457,7 @@ send_fragmentable:
+               if (ret == 0) {
+                       ret = kernel_sendmsg(conn->params.local->socket, &msg,
+                                            iov, 2, len);
+-                      conn->params.peer->last_tx_at = ktime_get_real();
++                      conn->params.peer->last_tx_at = ktime_get_seconds();
+                       opt = IP_PMTUDISC_DO;
+                       kernel_setsockopt(conn->params.local->socket, SOL_IP,
+@@ -475,7 +475,7 @@ send_fragmentable:
+               if (ret == 0) {
+                       ret = kernel_sendmsg(conn->params.local->socket, &msg,
+                                            iov, 2, len);
+-                      conn->params.peer->last_tx_at = ktime_get_real();
++                      conn->params.peer->last_tx_at = ktime_get_seconds();
+                       opt = IPV6_PMTUDISC_DO;
+                       kernel_setsockopt(conn->params.local->socket,
+@@ -599,6 +599,6 @@ void rxrpc_send_keepalive(struct rxrpc_p
+               trace_rxrpc_tx_fail(peer->debug_id, 0, ret,
+                                   rxrpc_tx_fail_version_keepalive);
+-      peer->last_tx_at = ktime_get_real();
++      peer->last_tx_at = ktime_get_seconds();
+       _leave("");
+ }
+--- a/net/rxrpc/peer_event.c
++++ b/net/rxrpc/peer_event.c
+@@ -350,97 +350,117 @@ void rxrpc_peer_add_rtt(struct rxrpc_cal
+ }
+ /*
+- * Perform keep-alive pings with VERSION packets to keep any NAT alive.
++ * Perform keep-alive pings.
+  */
+-void rxrpc_peer_keepalive_worker(struct work_struct *work)
++static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
++                                        struct list_head *collector,
++                                        time64_t base,
++                                        u8 cursor)
+ {
+-      struct rxrpc_net *rxnet =
+-              container_of(work, struct rxrpc_net, peer_keepalive_work);
+       struct rxrpc_peer *peer;
+-      unsigned long delay;
+-      ktime_t base, now = ktime_get_real();
+-      s64 diff;
+-      u8 cursor, slot;
++      const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
++      time64_t keepalive_at;
++      int slot;
+-      base = rxnet->peer_keepalive_base;
+-      cursor = rxnet->peer_keepalive_cursor;
+-
+-      _enter("%u,%lld", cursor, ktime_sub(now, base));
++      spin_lock_bh(&rxnet->peer_hash_lock);
+-next_bucket:
+-      diff = ktime_to_ns(ktime_sub(now, base));
+-      if (diff < 0)
+-              goto resched;
++      while (!list_empty(collector)) {
++              peer = list_entry(collector->next,
++                                struct rxrpc_peer, keepalive_link);
++
++              list_del_init(&peer->keepalive_link);
++              if (!rxrpc_get_peer_maybe(peer))
++                      continue;
+-      _debug("at %u", cursor);
+-      spin_lock_bh(&rxnet->peer_hash_lock);
+-next_peer:
+-      if (!rxnet->live) {
+               spin_unlock_bh(&rxnet->peer_hash_lock);
+-              goto out;
+-      }
+-      /* Everything in the bucket at the cursor is processed this second; the
+-       * bucket at cursor + 1 goes now + 1s and so on...
+-       */
+-      if (hlist_empty(&rxnet->peer_keepalive[cursor])) {
+-              if (hlist_empty(&rxnet->peer_keepalive_new)) {
+-                      spin_unlock_bh(&rxnet->peer_hash_lock);
+-                      goto emptied_bucket;
++              keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
++              slot = keepalive_at - base;
++              _debug("%02x peer %u t=%d {%pISp}",
++                     cursor, peer->debug_id, slot, &peer->srx.transport);
++
++              if (keepalive_at <= base ||
++                  keepalive_at > base + RXRPC_KEEPALIVE_TIME) {
++                      rxrpc_send_keepalive(peer);
++                      slot = RXRPC_KEEPALIVE_TIME;
+               }
+-              hlist_move_list(&rxnet->peer_keepalive_new,
+-                              &rxnet->peer_keepalive[cursor]);
++              /* A transmission to this peer occurred since last we examined
++               * it so put it into the appropriate future bucket.
++               */
++              slot += cursor;
++              slot &= mask;
++              spin_lock_bh(&rxnet->peer_hash_lock);
++              list_add_tail(&peer->keepalive_link,
++                            &rxnet->peer_keepalive[slot & mask]);
++              rxrpc_put_peer(peer);
+       }
+-      peer = hlist_entry(rxnet->peer_keepalive[cursor].first,
+-                         struct rxrpc_peer, keepalive_link);
+-      hlist_del_init(&peer->keepalive_link);
+-      if (!rxrpc_get_peer_maybe(peer))
+-              goto next_peer;
+-
+       spin_unlock_bh(&rxnet->peer_hash_lock);
++}
++
++/*
++ * Perform keep-alive pings with VERSION packets to keep any NAT alive.
++ */
++void rxrpc_peer_keepalive_worker(struct work_struct *work)
++{
++      struct rxrpc_net *rxnet =
++              container_of(work, struct rxrpc_net, peer_keepalive_work);
++      const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
++      time64_t base, now, delay;
++      u8 cursor, stop;
++      LIST_HEAD(collector);
+-      _debug("peer %u {%pISp}", peer->debug_id, &peer->srx.transport);
++      now = ktime_get_seconds();
++      base = rxnet->peer_keepalive_base;
++      cursor = rxnet->peer_keepalive_cursor;
++      _enter("%lld,%u", base - now, cursor);
+-recalc:
+-      diff = ktime_divns(ktime_sub(peer->last_tx_at, base), NSEC_PER_SEC);
+-      if (diff < -30 || diff > 30)
+-              goto send; /* LSW of 64-bit time probably wrapped on 32-bit */
+-      diff += RXRPC_KEEPALIVE_TIME - 1;
+-      if (diff < 0)
+-              goto send;
+-
+-      slot = (diff > RXRPC_KEEPALIVE_TIME - 1) ? RXRPC_KEEPALIVE_TIME - 1 : diff;
+-      if (slot == 0)
+-              goto send;
++      if (!rxnet->live)
++              return;
+-      /* A transmission to this peer occurred since last we examined it so
+-       * put it into the appropriate future bucket.
++      /* Remove to a temporary list all the peers that are currently lodged
++       * in expired buckets plus all new peers.
++       *
++       * Everything in the bucket at the cursor is processed this
++       * second; the bucket at cursor + 1 goes at now + 1s and so
++       * on...
+        */
+-      slot = (slot + cursor) % ARRAY_SIZE(rxnet->peer_keepalive);
+       spin_lock_bh(&rxnet->peer_hash_lock);
+-      hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive[slot]);
+-      rxrpc_put_peer(peer);
+-      goto next_peer;
+-
+-send:
+-      rxrpc_send_keepalive(peer);
+-      now = ktime_get_real();
+-      goto recalc;
+-
+-emptied_bucket:
+-      cursor++;
+-      if (cursor >= ARRAY_SIZE(rxnet->peer_keepalive))
+-              cursor = 0;
+-      base = ktime_add_ns(base, NSEC_PER_SEC);
+-      goto next_bucket;
++      list_splice_init(&rxnet->peer_keepalive_new, &collector);
++
++      stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive);
++      while (base <= now && (s8)(cursor - stop) < 0) {
++              list_splice_tail_init(&rxnet->peer_keepalive[cursor & mask],
++                                    &collector);
++              base++;
++              cursor++;
++      }
++
++      base = now;
++      spin_unlock_bh(&rxnet->peer_hash_lock);
+-resched:
+       rxnet->peer_keepalive_base = base;
+       rxnet->peer_keepalive_cursor = cursor;
+-      delay = nsecs_to_jiffies(-diff) + 1;
+-      timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay);
+-out:
++      rxrpc_peer_keepalive_dispatch(rxnet, &collector, base, cursor);
++      ASSERT(list_empty(&collector));
++
++      /* Schedule the timer for the next occupied timeslot. */
++      cursor = rxnet->peer_keepalive_cursor;
++      stop = cursor + RXRPC_KEEPALIVE_TIME - 1;
++      for (; (s8)(cursor - stop) < 0; cursor++) {
++              if (!list_empty(&rxnet->peer_keepalive[cursor & mask]))
++                      break;
++              base++;
++      }
++
++      now = ktime_get_seconds();
++      delay = base - now;
++      if (delay < 1)
++              delay = 1;
++      delay *= HZ;
++      if (rxnet->live)
++              timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay);
++
+       _leave("");
+ }
+--- a/net/rxrpc/peer_object.c
++++ b/net/rxrpc/peer_object.c
+@@ -322,7 +322,7 @@ struct rxrpc_peer *rxrpc_lookup_incoming
+       if (!peer) {
+               peer = prealloc;
+               hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
+-              hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive_new);
++              list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new);
+       }
+       spin_unlock(&rxnet->peer_hash_lock);
+@@ -367,8 +367,8 @@ struct rxrpc_peer *rxrpc_lookup_peer(str
+               if (!peer) {
+                       hash_add_rcu(rxnet->peer_hash,
+                                    &candidate->hash_link, hash_key);
+-                      hlist_add_head(&candidate->keepalive_link,
+-                                     &rxnet->peer_keepalive_new);
++                      list_add_tail(&candidate->keepalive_link,
++                                    &rxnet->peer_keepalive_new);
+               }
+               spin_unlock_bh(&rxnet->peer_hash_lock);
+@@ -441,7 +441,7 @@ static void __rxrpc_put_peer(struct rxrp
+       spin_lock_bh(&rxnet->peer_hash_lock);
+       hash_del_rcu(&peer->hash_link);
+-      hlist_del_init(&peer->keepalive_link);
++      list_del_init(&peer->keepalive_link);
+       spin_unlock_bh(&rxnet->peer_hash_lock);
+       kfree_rcu(peer, rcu);
+--- a/net/rxrpc/rxkad.c
++++ b/net/rxrpc/rxkad.c
+@@ -669,7 +669,7 @@ static int rxkad_issue_challenge(struct
+               return -EAGAIN;
+       }
+-      conn->params.peer->last_tx_at = ktime_get_real();
++      conn->params.peer->last_tx_at = ktime_get_seconds();
+       _leave(" = 0");
+       return 0;
+ }
+@@ -725,7 +725,7 @@ static int rxkad_send_response(struct rx
+               return -EAGAIN;
+       }
+-      conn->params.peer->last_tx_at = ktime_get_real();
++      conn->params.peer->last_tx_at = ktime_get_seconds();
+       _leave(" = 0");
+       return 0;
+ }
diff --git a/queue-4.17/series b/queue-4.17/series
new file mode 100644 (file)
index 0000000..75f7d82
--- /dev/null
@@ -0,0 +1,16 @@
+dccp-fix-undefined-behavior-with-cwnd-shift-in-ccid2_cwnd_restart.patch
+l2tp-use-sk_dst_check-to-avoid-race-on-sk-sk_dst_cache.patch
+llc-use-refcount_inc_not_zero-for-llc_sap_find.patch
+net_sched-fix-null-pointer-dereference-when-delete-tcindex-filter.patch
+vsock-split-dwork-to-avoid-reinitializations.patch
+net_sched-fix-missing-res-info-when-create-new-tc_index-filter.patch
+vhost-reset-metadata-cache-when-initializing-new-iotlb.patch
+mlxsw-core_acl_flex_actions-return-error-for-conflicting-actions.patch
+net-aquantia-fix-iff_allmulti-flag-functionality.patch
+ip6_tunnel-use-the-right-value-for-ipv4-min-mtu-check-in-ip6_tnl_xmit.patch
+mlxsw-core_acl_flex_actions-remove-redundant-resource-destruction.patch
+mlxsw-core_acl_flex_actions-remove-redundant-counter-destruction.patch
+mlxsw-core_acl_flex_actions-remove-redundant-mirror-resource-destruction.patch
+net-mlx5e-properly-check-if-hairpin-is-possible-between-two-functions.patch
+r8169-don-t-use-msi-x-on-rtl8168g.patch
+rxrpc-fix-the-keepalive-generator.patch
diff --git a/queue-4.17/vhost-reset-metadata-cache-when-initializing-new-iotlb.patch b/queue-4.17/vhost-reset-metadata-cache-when-initializing-new-iotlb.patch
new file mode 100644 (file)
index 0000000..f6c3f84
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Jason Wang <jasowang@redhat.com>
+Date: Wed, 8 Aug 2018 11:43:04 +0800
+Subject: vhost: reset metadata cache when initializing new IOTLB
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit b13f9c6364373a1b9f71e9846dc4fb199296f926 ]
+
+We need to reset metadata cache during new IOTLB initialization,
+otherwise the stale pointers to previous IOTLB may be still accessed
+which will lead a use after free.
+
+Reported-by: syzbot+c51e6736a1bf614b3272@syzkaller.appspotmail.com
+Fixes: f88949138058 ("vhost: introduce O(1) vq metadata cache")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/vhost.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -1556,9 +1556,12 @@ int vhost_init_device_iotlb(struct vhost
+       d->iotlb = niotlb;
+       for (i = 0; i < d->nvqs; ++i) {
+-              mutex_lock(&d->vqs[i]->mutex);
+-              d->vqs[i]->iotlb = niotlb;
+-              mutex_unlock(&d->vqs[i]->mutex);
++              struct vhost_virtqueue *vq = d->vqs[i];
++
++              mutex_lock(&vq->mutex);
++              vq->iotlb = niotlb;
++              __vhost_vq_meta_reset(vq);
++              mutex_unlock(&vq->mutex);
+       }
+       vhost_umem_clean(oiotlb);
diff --git a/queue-4.17/vsock-split-dwork-to-avoid-reinitializations.patch b/queue-4.17/vsock-split-dwork-to-avoid-reinitializations.patch
new file mode 100644 (file)
index 0000000..3cbb10e
--- /dev/null
@@ -0,0 +1,134 @@
+From foo@baz Sat Aug 18 11:41:41 CEST 2018
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Mon, 6 Aug 2018 11:06:02 -0700
+Subject: vsock: split dwork to avoid reinitializations
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 455f05ecd2b219e9a216050796d30c830d9bc393 ]
+
+syzbot reported that we reinitialize an active delayed
+work in vsock_stream_connect():
+
+       ODEBUG: init active (active state 0) object type: timer_list hint:
+       delayed_work_timer_fn+0x0/0x90 kernel/workqueue.c:1414
+       WARNING: CPU: 1 PID: 11518 at lib/debugobjects.c:329
+       debug_print_object+0x16a/0x210 lib/debugobjects.c:326
+
+The pattern is apparently wrong, we should only initialize
+the dealyed work once and could repeatly schedule it. So we
+have to move out the initializations to allocation side.
+And to avoid confusion, we can split the shared dwork
+into two, instead of re-using the same one.
+
+Fixes: d021c344051a ("VSOCK: Introduce VM Sockets")
+Reported-by: <syzbot+8a9b1bd330476a4f3db6@syzkaller.appspotmail.com>
+Cc: Andy king <acking@vmware.com>
+Cc: Stefan Hajnoczi <stefanha@redhat.com>
+Cc: Jorgen Hansen <jhansen@vmware.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/af_vsock.h         |    4 ++--
+ net/vmw_vsock/af_vsock.c       |   15 ++++++++-------
+ net/vmw_vsock/vmci_transport.c |    3 +--
+ 3 files changed, 11 insertions(+), 11 deletions(-)
+
+--- a/include/net/af_vsock.h
++++ b/include/net/af_vsock.h
+@@ -64,7 +64,8 @@ struct vsock_sock {
+       struct list_head pending_links;
+       struct list_head accept_queue;
+       bool rejected;
+-      struct delayed_work dwork;
++      struct delayed_work connect_work;
++      struct delayed_work pending_work;
+       struct delayed_work close_work;
+       bool close_work_scheduled;
+       u32 peer_shutdown;
+@@ -77,7 +78,6 @@ struct vsock_sock {
+ s64 vsock_stream_has_data(struct vsock_sock *vsk);
+ s64 vsock_stream_has_space(struct vsock_sock *vsk);
+-void vsock_pending_work(struct work_struct *work);
+ struct sock *__vsock_create(struct net *net,
+                           struct socket *sock,
+                           struct sock *parent,
+--- a/net/vmw_vsock/af_vsock.c
++++ b/net/vmw_vsock/af_vsock.c
+@@ -451,14 +451,14 @@ static int vsock_send_shutdown(struct so
+       return transport->shutdown(vsock_sk(sk), mode);
+ }
+-void vsock_pending_work(struct work_struct *work)
++static void vsock_pending_work(struct work_struct *work)
+ {
+       struct sock *sk;
+       struct sock *listener;
+       struct vsock_sock *vsk;
+       bool cleanup;
+-      vsk = container_of(work, struct vsock_sock, dwork.work);
++      vsk = container_of(work, struct vsock_sock, pending_work.work);
+       sk = sk_vsock(vsk);
+       listener = vsk->listener;
+       cleanup = true;
+@@ -498,7 +498,6 @@ out:
+       sock_put(sk);
+       sock_put(listener);
+ }
+-EXPORT_SYMBOL_GPL(vsock_pending_work);
+ /**** SOCKET OPERATIONS ****/
+@@ -597,6 +596,8 @@ static int __vsock_bind(struct sock *sk,
+       return retval;
+ }
++static void vsock_connect_timeout(struct work_struct *work);
++
+ struct sock *__vsock_create(struct net *net,
+                           struct socket *sock,
+                           struct sock *parent,
+@@ -638,6 +639,8 @@ struct sock *__vsock_create(struct net *
+       vsk->sent_request = false;
+       vsk->ignore_connecting_rst = false;
+       vsk->peer_shutdown = 0;
++      INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout);
++      INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work);
+       psk = parent ? vsock_sk(parent) : NULL;
+       if (parent) {
+@@ -1117,7 +1120,7 @@ static void vsock_connect_timeout(struct
+       struct vsock_sock *vsk;
+       int cancel = 0;
+-      vsk = container_of(work, struct vsock_sock, dwork.work);
++      vsk = container_of(work, struct vsock_sock, connect_work.work);
+       sk = sk_vsock(vsk);
+       lock_sock(sk);
+@@ -1221,9 +1224,7 @@ static int vsock_stream_connect(struct s
+                        * timeout fires.
+                        */
+                       sock_hold(sk);
+-                      INIT_DELAYED_WORK(&vsk->dwork,
+-                                        vsock_connect_timeout);
+-                      schedule_delayed_work(&vsk->dwork, timeout);
++                      schedule_delayed_work(&vsk->connect_work, timeout);
+                       /* Skip ahead to preserve error code set above. */
+                       goto out_wait;
+--- a/net/vmw_vsock/vmci_transport.c
++++ b/net/vmw_vsock/vmci_transport.c
+@@ -1094,8 +1094,7 @@ static int vmci_transport_recv_listen(st
+       vpending->listener = sk;
+       sock_hold(sk);
+       sock_hold(pending);
+-      INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
+-      schedule_delayed_work(&vpending->dwork, HZ);
++      schedule_delayed_work(&vpending->pending_work, HZ);
+ out:
+       return err;