]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 16 Mar 2019 08:26:20 +0000 (01:26 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 16 Mar 2019 08:26:20 +0000 (01:26 -0700)
added patches:
gro_cells-make-sure-device-is-up-in-gro_cells_receive.patch
ipvlan-disallow-userns-cap_net_admin-to-change-global-mode-flags.patch
l2tp-fix-infoleak-in-l2tp_ip6_recvmsg.patch
mdio_bus-fix-use-after-free-on-device_register-fails.patch
missing-barriers-in-some-of-unix_sock-addr-and-path-accesses.patch
net-hsr-fix-memory-leak-in-hsr_dev_finalize.patch
net-hsr-fix-possible-crash-in-add_timer.patch
net-mlx4_core-fix-locking-in-sriov-mode-when-switching-between-events-and-polling.patch
net-mlx4_core-fix-qp-mtt-size-calculation.patch
net-mlx4_core-fix-reset-flow-when-in-command-polling-mode.patch
net-set-rtm_table-to-rt_table_compat-for-ipv6-for-tables-255.patch
net-sit-fix-ubsan-undefined-behaviour-in-check_6rd.patch
net-x25-fix-a-race-in-x25_bind.patch
net-x25-fix-use-after-free-in-x25_device_event.patch
net-x25-reset-state-in-x25_connect.patch
pptp-dst_release-sk_dst_cache-in-pptp_sock_destruct.patch
ravb-decrease-txfifo-depth-of-q3-and-q2-to-one.patch
route-set-the-deleted-fnhe-fnhe_daddr-to-0-in-ip_del_fnhe-to-fix-a-race.patch
rxrpc-fix-client-call-queueing-waiting-for-channel.patch
tcp-handle-inet_csk_reqsk_queue_add-failures.patch
vxlan-fix-gro-cells-race-condition-between-receive-and-link-delete.patch
vxlan-test-dev-flags-iff_up-before-calling-gro_cells_receive.patch

23 files changed:
queue-4.9/gro_cells-make-sure-device-is-up-in-gro_cells_receive.patch [new file with mode: 0644]
queue-4.9/ipvlan-disallow-userns-cap_net_admin-to-change-global-mode-flags.patch [new file with mode: 0644]
queue-4.9/l2tp-fix-infoleak-in-l2tp_ip6_recvmsg.patch [new file with mode: 0644]
queue-4.9/mdio_bus-fix-use-after-free-on-device_register-fails.patch [new file with mode: 0644]
queue-4.9/missing-barriers-in-some-of-unix_sock-addr-and-path-accesses.patch [new file with mode: 0644]
queue-4.9/net-hsr-fix-memory-leak-in-hsr_dev_finalize.patch [new file with mode: 0644]
queue-4.9/net-hsr-fix-possible-crash-in-add_timer.patch [new file with mode: 0644]
queue-4.9/net-mlx4_core-fix-locking-in-sriov-mode-when-switching-between-events-and-polling.patch [new file with mode: 0644]
queue-4.9/net-mlx4_core-fix-qp-mtt-size-calculation.patch [new file with mode: 0644]
queue-4.9/net-mlx4_core-fix-reset-flow-when-in-command-polling-mode.patch [new file with mode: 0644]
queue-4.9/net-set-rtm_table-to-rt_table_compat-for-ipv6-for-tables-255.patch [new file with mode: 0644]
queue-4.9/net-sit-fix-ubsan-undefined-behaviour-in-check_6rd.patch [new file with mode: 0644]
queue-4.9/net-x25-fix-a-race-in-x25_bind.patch [new file with mode: 0644]
queue-4.9/net-x25-fix-use-after-free-in-x25_device_event.patch [new file with mode: 0644]
queue-4.9/net-x25-reset-state-in-x25_connect.patch [new file with mode: 0644]
queue-4.9/pptp-dst_release-sk_dst_cache-in-pptp_sock_destruct.patch [new file with mode: 0644]
queue-4.9/ravb-decrease-txfifo-depth-of-q3-and-q2-to-one.patch [new file with mode: 0644]
queue-4.9/route-set-the-deleted-fnhe-fnhe_daddr-to-0-in-ip_del_fnhe-to-fix-a-race.patch [new file with mode: 0644]
queue-4.9/rxrpc-fix-client-call-queueing-waiting-for-channel.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/tcp-handle-inet_csk_reqsk_queue_add-failures.patch [new file with mode: 0644]
queue-4.9/vxlan-fix-gro-cells-race-condition-between-receive-and-link-delete.patch [new file with mode: 0644]
queue-4.9/vxlan-test-dev-flags-iff_up-before-calling-gro_cells_receive.patch [new file with mode: 0644]

diff --git a/queue-4.9/gro_cells-make-sure-device-is-up-in-gro_cells_receive.patch b/queue-4.9/gro_cells-make-sure-device-is-up-in-gro_cells_receive.patch
new file mode 100644 (file)
index 0000000..4d986b0
--- /dev/null
@@ -0,0 +1,130 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 10 Mar 2019 10:39:37 -0700
+Subject: gro_cells: make sure device is up in gro_cells_receive()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 2a5ff07a0eb945f291e361aa6f6becca8340ba46 ]
+
+We keep receiving syzbot reports [1] that show that tunnels do not play
+the rcu/IFF_UP rules properly.
+
+At device dismantle phase, gro_cells_destroy() will be called
+only after a full rcu grace period is observed after IFF_UP
+has been cleared.
+
+This means that IFF_UP needs to be tested before queueing packets
+into netif_rx() or gro_cells.
+
+This patch implements the test in gro_cells_receive() because
+too many callers do not seem to bother enough.
+
+[1]
+BUG: unable to handle kernel paging request at fffff4ca0b9ffffe
+PGD 0 P4D 0
+Oops: 0000 [#1] PREEMPT SMP KASAN
+CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.0.0+ #97
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Workqueue: netns cleanup_net
+RIP: 0010:__skb_unlink include/linux/skbuff.h:1929 [inline]
+RIP: 0010:__skb_dequeue include/linux/skbuff.h:1945 [inline]
+RIP: 0010:__skb_queue_purge include/linux/skbuff.h:2656 [inline]
+RIP: 0010:gro_cells_destroy net/core/gro_cells.c:89 [inline]
+RIP: 0010:gro_cells_destroy+0x19d/0x360 net/core/gro_cells.c:78
+Code: 03 42 80 3c 20 00 0f 85 53 01 00 00 48 8d 7a 08 49 8b 47 08 49 c7 07 00 00 00 00 48 89 f9 49 c7 47 08 00 00 00 00 48 c1 e9 03 <42> 80 3c 21 00 0f 85 10 01 00 00 48 89 c1 48 89 42 08 48 c1 e9 03
+RSP: 0018:ffff8880aa3f79a8 EFLAGS: 00010a02
+RAX: 00ffffffffffffe8 RBX: ffffe8ffffc64b70 RCX: 1ffff8ca0b9ffffe
+RDX: ffffc6505cffffe8 RSI: ffffffff858410ca RDI: ffffc6505cfffff0
+RBP: ffff8880aa3f7a08 R08: ffff8880aa3e8580 R09: fffffbfff1263645
+R10: fffffbfff1263644 R11: ffffffff8931b223 R12: dffffc0000000000
+R13: 0000000000000000 R14: ffffe8ffffc64b80 R15: ffffe8ffffc64b75
+kobject: 'loop2' (000000004bd7d84a): kobject_uevent_env
+FS:  0000000000000000(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: fffff4ca0b9ffffe CR3: 0000000094941000 CR4: 00000000001406f0
+Call Trace:
+kobject: 'loop2' (000000004bd7d84a): fill_kobj_path: path = '/devices/virtual/block/loop2'
+ ip_tunnel_dev_free+0x19/0x60 net/ipv4/ip_tunnel.c:1010
+ netdev_run_todo+0x51c/0x7d0 net/core/dev.c:8970
+ rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:116
+ ip_tunnel_delete_nets+0x423/0x5f0 net/ipv4/ip_tunnel.c:1124
+ vti_exit_batch_net+0x23/0x30 net/ipv4/ip_vti.c:495
+ ops_exit_list.isra.0+0x105/0x160 net/core/net_namespace.c:156
+ cleanup_net+0x3fb/0x960 net/core/net_namespace.c:551
+ process_one_work+0x98e/0x1790 kernel/workqueue.c:2173
+ worker_thread+0x98/0xe40 kernel/workqueue.c:2319
+ kthread+0x357/0x430 kernel/kthread.c:246
+ ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352
+Modules linked in:
+CR2: fffff4ca0b9ffffe
+   [ end trace 513fc9c1338d1cb3 ]
+RIP: 0010:__skb_unlink include/linux/skbuff.h:1929 [inline]
+RIP: 0010:__skb_dequeue include/linux/skbuff.h:1945 [inline]
+RIP: 0010:__skb_queue_purge include/linux/skbuff.h:2656 [inline]
+RIP: 0010:gro_cells_destroy net/core/gro_cells.c:89 [inline]
+RIP: 0010:gro_cells_destroy+0x19d/0x360 net/core/gro_cells.c:78
+Code: 03 42 80 3c 20 00 0f 85 53 01 00 00 48 8d 7a 08 49 8b 47 08 49 c7 07 00 00 00 00 48 89 f9 49 c7 47 08 00 00 00 00 48 c1 e9 03 <42> 80 3c 21 00 0f 85 10 01 00 00 48 89 c1 48 89 42 08 48 c1 e9 03
+RSP: 0018:ffff8880aa3f79a8 EFLAGS: 00010a02
+RAX: 00ffffffffffffe8 RBX: ffffe8ffffc64b70 RCX: 1ffff8ca0b9ffffe
+RDX: ffffc6505cffffe8 RSI: ffffffff858410ca RDI: ffffc6505cfffff0
+RBP: ffff8880aa3f7a08 R08: ffff8880aa3e8580 R09: fffffbfff1263645
+R10: fffffbfff1263644 R11: ffffffff8931b223 R12: dffffc0000000000
+kobject: 'loop3' (00000000e4ee57a6): kobject_uevent_env
+R13: 0000000000000000 R14: ffffe8ffffc64b80 R15: ffffe8ffffc64b75
+FS:  0000000000000000(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: fffff4ca0b9ffffe CR3: 0000000094941000 CR4: 00000000001406f0
+
+Fixes: c9e6bc644e55 ("net: add gro_cells infrastructure")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/gro_cells.h |   22 ++++++++++++++++++----
+ 1 file changed, 18 insertions(+), 4 deletions(-)
+
+--- a/include/net/gro_cells.h
++++ b/include/net/gro_cells.h
+@@ -18,22 +18,36 @@ static inline int gro_cells_receive(stru
+ {
+       struct gro_cell *cell;
+       struct net_device *dev = skb->dev;
++      int res;
+-      if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO))
+-              return netif_rx(skb);
++      rcu_read_lock();
++      if (unlikely(!(dev->flags & IFF_UP)))
++              goto drop;
++
++      if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) {
++              res = netif_rx(skb);
++              goto unlock;
++      }
+       cell = this_cpu_ptr(gcells->cells);
+       if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
++drop:
+               atomic_long_inc(&dev->rx_dropped);
+               kfree_skb(skb);
+-              return NET_RX_DROP;
++              res = NET_RX_DROP;
++              goto unlock;
+       }
+       __skb_queue_tail(&cell->napi_skbs, skb);
+       if (skb_queue_len(&cell->napi_skbs) == 1)
+               napi_schedule(&cell->napi);
+-      return NET_RX_SUCCESS;
++
++      res = NET_RX_SUCCESS;
++
++unlock:
++      rcu_read_unlock();
++      return res;
+ }
+ /* called under BH context */
diff --git a/queue-4.9/ipvlan-disallow-userns-cap_net_admin-to-change-global-mode-flags.patch b/queue-4.9/ipvlan-disallow-userns-cap_net_admin-to-change-global-mode-flags.patch
new file mode 100644 (file)
index 0000000..6c6609f
--- /dev/null
@@ -0,0 +1,134 @@
+From foo@baz Fri Mar 15 20:48:31 PDT 2019
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 20 Feb 2019 00:15:30 +0100
+Subject: ipvlan: disallow userns cap_net_admin to change global mode/flags
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 7cc9f7003a969d359f608ebb701d42cafe75b84a ]
+
+When running Docker with userns isolation e.g. --userns-remap="default"
+and spawning up some containers with CAP_NET_ADMIN under this realm, I
+noticed that link changes on ipvlan slave device inside that container
+can affect all devices from this ipvlan group which are in other net
+namespaces where the container should have no permission to make changes
+to, such as the init netns, for example.
+
+This effectively allows to undo ipvlan private mode and switch globally to
+bridge mode where slaves can communicate directly without going through
+hostns, or it allows to switch between global operation mode (l2/l3/l3s)
+for everyone bound to the given ipvlan master device. libnetwork plugin
+here is creating an ipvlan master and ipvlan slave in hostns and a slave
+each that is moved into the container's netns upon creation event.
+
+* In hostns:
+
+  # ip -d a
+  [...]
+  8: cilium_host@bond0: <BROADCAST,MULTICAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
+     link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535
+     ipvlan  mode l3 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
+     inet 10.41.0.1/32 scope link cilium_host
+       valid_lft forever preferred_lft forever
+  [...]
+
+* Spawn container & change ipvlan mode setting inside of it:
+
+  # docker run -dt --cap-add=NET_ADMIN --network cilium-net --name client -l app=test cilium/netperf
+  9fff485d69dcb5ce37c9e33ca20a11ccafc236d690105aadbfb77e4f4170879c
+
+  # docker exec -ti client ip -d a
+  [...]
+  10: cilium0@if4: <BROADCAST,MULTICAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
+      link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535
+      ipvlan  mode l3 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
+      inet 10.41.197.43/32 brd 10.41.197.43 scope global cilium0
+         valid_lft forever preferred_lft forever
+
+  # docker exec -ti client ip link change link cilium0 name cilium0 type ipvlan mode l2
+
+  # docker exec -ti client ip -d a
+  [...]
+  10: cilium0@if4: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
+      link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535
+      ipvlan  mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
+      inet 10.41.197.43/32 brd 10.41.197.43 scope global cilium0
+         valid_lft forever preferred_lft forever
+
+* In hostns (mode switched to l2):
+
+  # ip -d a
+  [...]
+  8: cilium_host@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
+      link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535
+      ipvlan  mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
+      inet 10.41.0.1/32 scope link cilium_host
+         valid_lft forever preferred_lft forever
+  [...]
+
+Same l3 -> l2 switch would also happen by creating another slave inside
+the container's network namespace when specifying the existing cilium0
+link to derive the actual (bond0) master:
+
+  # docker exec -ti client ip link add link cilium0 name cilium1 type ipvlan mode l2
+
+  # docker exec -ti client ip -d a
+  [...]
+  2: cilium1@if4: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
+      link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535
+      ipvlan  mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
+  10: cilium0@if4: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
+      link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535
+      ipvlan  mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
+      inet 10.41.197.43/32 brd 10.41.197.43 scope global cilium0
+         valid_lft forever preferred_lft forever
+
+* In hostns:
+
+  # ip -d a
+  [...]
+  8: cilium_host@bond0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
+      link/ether 0c:c4:7a:e1:3d:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535
+      ipvlan  mode l2 bridge numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
+      inet 10.41.0.1/32 scope link cilium_host
+         valid_lft forever preferred_lft forever
+  [...]
+
+One way to mitigate it is to check CAP_NET_ADMIN permissions of
+the ipvlan master device's ns, and only then allow to change
+mode or flags for all devices bound to it. Above two cases are
+then disallowed after the patch.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Mahesh Bandewar <maheshb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipvlan/ipvlan_main.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ipvlan/ipvlan_main.c
++++ b/drivers/net/ipvlan/ipvlan_main.c
+@@ -463,7 +463,12 @@ static int ipvlan_nl_changelink(struct n
+       struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
+       int err = 0;
+-      if (data && data[IFLA_IPVLAN_MODE]) {
++      if (!data)
++              return 0;
++      if (!ns_capable(dev_net(ipvlan->phy_dev)->user_ns, CAP_NET_ADMIN))
++              return -EPERM;
++
++      if (data[IFLA_IPVLAN_MODE]) {
+               u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
+               err = ipvlan_set_port_mode(port, nmode);
+@@ -530,6 +535,8 @@ static int ipvlan_link_new(struct net *s
+               struct ipvl_dev *tmp = netdev_priv(phy_dev);
+               phy_dev = tmp->phy_dev;
++              if (!ns_capable(dev_net(phy_dev)->user_ns, CAP_NET_ADMIN))
++                      return -EPERM;
+       } else if (!netif_is_ipvlan_port(phy_dev)) {
+               err = ipvlan_port_create(phy_dev);
+               if (err < 0)
diff --git a/queue-4.9/l2tp-fix-infoleak-in-l2tp_ip6_recvmsg.patch b/queue-4.9/l2tp-fix-infoleak-in-l2tp_ip6_recvmsg.patch
new file mode 100644 (file)
index 0000000..cc212c7
--- /dev/null
@@ -0,0 +1,83 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 12 Mar 2019 06:50:11 -0700
+Subject: l2tp: fix infoleak in l2tp_ip6_recvmsg()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 163d1c3d6f17556ed3c340d3789ea93be95d6c28 ]
+
+Back in 2013 Hannes took care of most of such leaks in commit
+bceaa90240b6 ("inet: prevent leakage of uninitialized memory to user in recv syscalls")
+
+But the bug in l2tp_ip6_recvmsg() has not been fixed.
+
+syzbot report :
+
+BUG: KMSAN: kernel-infoleak in _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32
+CPU: 1 PID: 10996 Comm: syz-executor362 Not tainted 5.0.0+ #11
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x173/0x1d0 lib/dump_stack.c:113
+ kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:600
+ kmsan_internal_check_memory+0x9f4/0xb10 mm/kmsan/kmsan.c:694
+ kmsan_copy_to_user+0xab/0xc0 mm/kmsan/kmsan_hooks.c:601
+ _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32
+ copy_to_user include/linux/uaccess.h:174 [inline]
+ move_addr_to_user+0x311/0x570 net/socket.c:227
+ ___sys_recvmsg+0xb65/0x1310 net/socket.c:2283
+ do_recvmmsg+0x646/0x10c0 net/socket.c:2390
+ __sys_recvmmsg net/socket.c:2469 [inline]
+ __do_sys_recvmmsg net/socket.c:2492 [inline]
+ __se_sys_recvmmsg+0x1d1/0x350 net/socket.c:2485
+ __x64_sys_recvmmsg+0x62/0x80 net/socket.c:2485
+ do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291
+ entry_SYSCALL_64_after_hwframe+0x63/0xe7
+RIP: 0033:0x445819
+Code: e8 6c b6 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b 12 fc ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007f64453eddb8 EFLAGS: 00000246 ORIG_RAX: 000000000000012b
+RAX: ffffffffffffffda RBX: 00000000006dac28 RCX: 0000000000445819
+RDX: 0000000000000005 RSI: 0000000020002f80 RDI: 0000000000000003
+RBP: 00000000006dac20 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dac2c
+R13: 00007ffeba8f87af R14: 00007f64453ee9c0 R15: 20c49ba5e353f7cf
+
+Local variable description: ----addr@___sys_recvmsg
+Variable was created at:
+ ___sys_recvmsg+0xf6/0x1310 net/socket.c:2244
+ do_recvmmsg+0x646/0x10c0 net/socket.c:2390
+
+Bytes 0-31 of 32 are uninitialized
+Memory access of size 32 starts at ffff8880ae62fbb0
+Data copied to user address 0000000020000000
+
+Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_ip6.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/net/l2tp/l2tp_ip6.c
++++ b/net/l2tp/l2tp_ip6.c
+@@ -680,9 +680,6 @@ static int l2tp_ip6_recvmsg(struct sock
+       if (flags & MSG_OOB)
+               goto out;
+-      if (addr_len)
+-              *addr_len = sizeof(*lsa);
+-
+       if (flags & MSG_ERRQUEUE)
+               return ipv6_recv_error(sk, msg, len, addr_len);
+@@ -712,6 +709,7 @@ static int l2tp_ip6_recvmsg(struct sock
+               lsa->l2tp_conn_id = 0;
+               if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
+                       lsa->l2tp_scope_id = inet6_iif(skb);
++              *addr_len = sizeof(*lsa);
+       }
+       if (np->rxopt.all)
diff --git a/queue-4.9/mdio_bus-fix-use-after-free-on-device_register-fails.patch b/queue-4.9/mdio_bus-fix-use-after-free-on-device_register-fails.patch
new file mode 100644 (file)
index 0000000..a586f88
--- /dev/null
@@ -0,0 +1,125 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: YueHaibing <yuehaibing@huawei.com>
+Date: Thu, 21 Feb 2019 22:42:01 +0800
+Subject: mdio_bus: Fix use-after-free on device_register fails
+
+From: YueHaibing <yuehaibing@huawei.com>
+
+[ Upstream commit 6ff7b060535e87c2ae14dd8548512abfdda528fb ]
+
+KASAN has found use-after-free in fixed_mdio_bus_init,
+commit 0c692d07842a ("drivers/net/phy/mdio_bus.c: call
+put_device on device_register() failure") call put_device()
+while device_register() fails,give up the last reference
+to the device and allow mdiobus_release to be executed
+,kfreeing the bus. However in most drives, mdiobus_free
+be called to free the bus while mdiobus_register fails.
+use-after-free occurs when access bus again, this patch
+revert it to let mdiobus_free free the bus.
+
+KASAN report details as below:
+
+BUG: KASAN: use-after-free in mdiobus_free+0x85/0x90 drivers/net/phy/mdio_bus.c:482
+Read of size 4 at addr ffff8881dc824d78 by task syz-executor.0/3524
+
+CPU: 1 PID: 3524 Comm: syz-executor.0 Not tainted 5.0.0-rc7+ #45
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0xfa/0x1ce lib/dump_stack.c:113
+ print_address_description+0x65/0x270 mm/kasan/report.c:187
+ kasan_report+0x149/0x18d mm/kasan/report.c:317
+ mdiobus_free+0x85/0x90 drivers/net/phy/mdio_bus.c:482
+ fixed_mdio_bus_init+0x283/0x1000 [fixed_phy]
+ ? 0xffffffffc0e40000
+ ? 0xffffffffc0e40000
+ ? 0xffffffffc0e40000
+ do_one_initcall+0xfa/0x5ca init/main.c:887
+ do_init_module+0x204/0x5f6 kernel/module.c:3460
+ load_module+0x66b2/0x8570 kernel/module.c:3808
+ __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902
+ do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x462e99
+Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007f6215c19c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
+RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99
+RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000003
+RBP: 00007f6215c19c70 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00007f6215c1a6bc
+R13: 00000000004bcefb R14: 00000000006f7030 R15: 0000000000000004
+
+Allocated by task 3524:
+ set_track mm/kasan/common.c:85 [inline]
+ __kasan_kmalloc.constprop.3+0xa0/0xd0 mm/kasan/common.c:496
+ kmalloc include/linux/slab.h:545 [inline]
+ kzalloc include/linux/slab.h:740 [inline]
+ mdiobus_alloc_size+0x54/0x1b0 drivers/net/phy/mdio_bus.c:143
+ fixed_mdio_bus_init+0x163/0x1000 [fixed_phy]
+ do_one_initcall+0xfa/0x5ca init/main.c:887
+ do_init_module+0x204/0x5f6 kernel/module.c:3460
+ load_module+0x66b2/0x8570 kernel/module.c:3808
+ __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902
+ do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+Freed by task 3524:
+ set_track mm/kasan/common.c:85 [inline]
+ __kasan_slab_free+0x130/0x180 mm/kasan/common.c:458
+ slab_free_hook mm/slub.c:1409 [inline]
+ slab_free_freelist_hook mm/slub.c:1436 [inline]
+ slab_free mm/slub.c:2986 [inline]
+ kfree+0xe1/0x270 mm/slub.c:3938
+ device_release+0x78/0x200 drivers/base/core.c:919
+ kobject_cleanup lib/kobject.c:662 [inline]
+ kobject_release lib/kobject.c:691 [inline]
+ kref_put include/linux/kref.h:67 [inline]
+ kobject_put+0x146/0x240 lib/kobject.c:708
+ put_device+0x1c/0x30 drivers/base/core.c:2060
+ __mdiobus_register+0x483/0x560 drivers/net/phy/mdio_bus.c:382
+ fixed_mdio_bus_init+0x26b/0x1000 [fixed_phy]
+ do_one_initcall+0xfa/0x5ca init/main.c:887
+ do_init_module+0x204/0x5f6 kernel/module.c:3460
+ load_module+0x66b2/0x8570 kernel/module.c:3808
+ __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902
+ do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+The buggy address belongs to the object at ffff8881dc824c80
+ which belongs to the cache kmalloc-2k of size 2048
+The buggy address is located 248 bytes inside of
+ 2048-byte region [ffff8881dc824c80, ffff8881dc825480)
+The buggy address belongs to the page:
+page:ffffea0007720800 count:1 mapcount:0 mapping:ffff8881f6c02800 index:0x0 compound_mapcount: 0
+flags: 0x2fffc0000010200(slab|head)
+raw: 02fffc0000010200 0000000000000000 0000000500000001 ffff8881f6c02800
+raw: 0000000000000000 00000000800f000f 00000001ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffff8881dc824c00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff8881dc824c80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+>ffff8881dc824d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+                                                                ^
+ ffff8881dc824d80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8881dc824e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+
+Fixes: 0c692d07842a ("drivers/net/phy/mdio_bus.c: call put_device on device_register() failure")
+Signed-off-by: YueHaibing <yuehaibing@huawei.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/mdio_bus.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/net/phy/mdio_bus.c
++++ b/drivers/net/phy/mdio_bus.c
+@@ -319,7 +319,6 @@ int __mdiobus_register(struct mii_bus *b
+       err = device_register(&bus->dev);
+       if (err) {
+               pr_err("mii_bus %s failed to register\n", bus->id);
+-              put_device(&bus->dev);
+               return -EINVAL;
+       }
diff --git a/queue-4.9/missing-barriers-in-some-of-unix_sock-addr-and-path-accesses.patch b/queue-4.9/missing-barriers-in-some-of-unix_sock-addr-and-path-accesses.patch
new file mode 100644 (file)
index 0000000..5070130
--- /dev/null
@@ -0,0 +1,213 @@
+From foo@baz Fri Mar 15 20:48:31 PDT 2019
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Fri, 15 Feb 2019 20:09:35 +0000
+Subject: missing barriers in some of unix_sock ->addr and ->path accesses
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit ae3b564179bfd06f32d051b9e5d72ce4b2a07c37 ]
+
+Several u->addr and u->path users are not holding any locks in
+common with unix_bind().  unix_state_lock() is useless for those
+purposes.
+
+u->addr is assign-once and *(u->addr) is fully set up by the time
+we set u->addr (all under unix_table_lock).  u->path is also
+set in the same critical area, also before setting u->addr, and
+any unix_sock with ->path filled will have non-NULL ->addr.
+
+So setting ->addr with smp_store_release() is all we need for those
+"lockless" users - just have them fetch ->addr with smp_load_acquire()
+and don't even bother looking at ->path if they see NULL ->addr.
+
+Users of ->addr and ->path fall into several classes now:
+    1) ones that do smp_load_acquire(u->addr) and access *(u->addr)
+and u->path only if smp_load_acquire() has returned non-NULL.
+    2) places holding unix_table_lock.  These are guaranteed that
+*(u->addr) is seen fully initialized.  If unix_sock is in one of the
+"bound" chains, so's ->path.
+    3) unix_sock_destructor() using ->addr is safe.  All places
+that set u->addr are guaranteed to have seen all stores *(u->addr)
+while holding a reference to u and unix_sock_destructor() is called
+when (atomic) refcount hits zero.
+    4) unix_release_sock() using ->path is safe.  unix_bind()
+is serialized wrt unix_release() (normally - by struct file
+refcount), and for the instances that had ->path set by unix_bind()
+unix_release_sock() comes from unix_release(), so they are fine.
+Instances that had it set in unix_stream_connect() either end up
+attached to a socket (in unix_accept()), in which case the call
+chain to unix_release_sock() and serialization are the same as in
+the previous case, or they never get accept'ed and unix_release_sock()
+is called when the listener is shut down and its queue gets purged.
+In that case the listener's queue lock provides the barriers needed -
+unix_stream_connect() shoves our unix_sock into listener's queue
+under that lock right after having set ->path and eventual
+unix_release_sock() caller picks them from that queue under the
+same lock right before calling unix_release_sock().
+    5) unix_find_other() use of ->path is pointless, but safe -
+it happens with successful lookup by (abstract) name, so ->path.dentry
+is guaranteed to be NULL there.
+
+earlier-variant-reviewed-by: "Paul E. McKenney" <paulmck@linux.ibm.com>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/af_unix.c   |   48 +++++++++++++++++++++++++++++-------------------
+ net/unix/diag.c      |    3 ++-
+ security/lsm_audit.c |   10 ++++++----
+ 3 files changed, 37 insertions(+), 24 deletions(-)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -891,7 +891,7 @@ retry:
+       addr->hash ^= sk->sk_type;
+       __unix_remove_socket(sk);
+-      u->addr = addr;
++      smp_store_release(&u->addr, addr);
+       __unix_insert_socket(&unix_socket_table[addr->hash], sk);
+       spin_unlock(&unix_table_lock);
+       err = 0;
+@@ -1061,7 +1061,7 @@ static int unix_bind(struct socket *sock
+       err = 0;
+       __unix_remove_socket(sk);
+-      u->addr = addr;
++      smp_store_release(&u->addr, addr);
+       __unix_insert_socket(list, sk);
+ out_unlock:
+@@ -1332,15 +1332,29 @@ restart:
+       RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
+       otheru = unix_sk(other);
+-      /* copy address information from listening to new sock*/
+-      if (otheru->addr) {
+-              atomic_inc(&otheru->addr->refcnt);
+-              newu->addr = otheru->addr;
+-      }
++      /* copy address information from listening to new sock
++       *
++       * The contents of *(otheru->addr) and otheru->path
++       * are seen fully set up here, since we have found
++       * otheru in hash under unix_table_lock.  Insertion
++       * into the hash chain we'd found it in had been done
++       * in an earlier critical area protected by unix_table_lock,
++       * the same one where we'd set *(otheru->addr) contents,
++       * as well as otheru->path and otheru->addr itself.
++       *
++       * Using smp_store_release() here to set newu->addr
++       * is enough to make those stores, as well as stores
++       * to newu->path visible to anyone who gets newu->addr
++       * by smp_load_acquire().  IOW, the same warranties
++       * as for unix_sock instances bound in unix_bind() or
++       * in unix_autobind().
++       */
+       if (otheru->path.dentry) {
+               path_get(&otheru->path);
+               newu->path = otheru->path;
+       }
++      atomic_inc(&otheru->addr->refcnt);
++      smp_store_release(&newu->addr, otheru->addr);
+       /* Set credentials */
+       copy_peercred(sk, other);
+@@ -1453,7 +1467,7 @@ out:
+ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
+ {
+       struct sock *sk = sock->sk;
+-      struct unix_sock *u;
++      struct unix_address *addr;
+       DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
+       int err = 0;
+@@ -1468,19 +1482,15 @@ static int unix_getname(struct socket *s
+               sock_hold(sk);
+       }
+-      u = unix_sk(sk);
+-      unix_state_lock(sk);
+-      if (!u->addr) {
++      addr = smp_load_acquire(&unix_sk(sk)->addr);
++      if (!addr) {
+               sunaddr->sun_family = AF_UNIX;
+               sunaddr->sun_path[0] = 0;
+               *uaddr_len = sizeof(short);
+       } else {
+-              struct unix_address *addr = u->addr;
+-
+               *uaddr_len = addr->len;
+               memcpy(sunaddr, addr->name, *uaddr_len);
+       }
+-      unix_state_unlock(sk);
+       sock_put(sk);
+ out:
+       return err;
+@@ -2094,11 +2104,11 @@ static int unix_seqpacket_recvmsg(struct
+ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
+ {
+-      struct unix_sock *u = unix_sk(sk);
++      struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
+-      if (u->addr) {
+-              msg->msg_namelen = u->addr->len;
+-              memcpy(msg->msg_name, u->addr->name, u->addr->len);
++      if (addr) {
++              msg->msg_namelen = addr->len;
++              memcpy(msg->msg_name, addr->name, addr->len);
+       }
+ }
+@@ -2814,7 +2824,7 @@ static int unix_seq_show(struct seq_file
+                       (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
+                       sock_i_ino(s));
+-              if (u->addr) {
++              if (u->addr) {  // under unix_table_lock here
+                       int i, len;
+                       seq_putc(seq, ' ');
+--- a/net/unix/diag.c
++++ b/net/unix/diag.c
+@@ -10,7 +10,8 @@
+ static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
+ {
+-      struct unix_address *addr = unix_sk(sk)->addr;
++      /* might or might not have unix_table_lock */
++      struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
+       if (!addr)
+               return 0;
+--- a/security/lsm_audit.c
++++ b/security/lsm_audit.c
+@@ -321,6 +321,7 @@ static void dump_common_audit_data(struc
+               if (a->u.net->sk) {
+                       struct sock *sk = a->u.net->sk;
+                       struct unix_sock *u;
++                      struct unix_address *addr;
+                       int len = 0;
+                       char *p = NULL;
+@@ -351,14 +352,15 @@ static void dump_common_audit_data(struc
+ #endif
+                       case AF_UNIX:
+                               u = unix_sk(sk);
++                              addr = smp_load_acquire(&u->addr);
++                              if (!addr)
++                                      break;
+                               if (u->path.dentry) {
+                                       audit_log_d_path(ab, " path=", &u->path);
+                                       break;
+                               }
+-                              if (!u->addr)
+-                                      break;
+-                              len = u->addr->len-sizeof(short);
+-                              p = &u->addr->name->sun_path[0];
++                              len = addr->len-sizeof(short);
++                              p = &addr->name->sun_path[0];
+                               audit_log_format(ab, " path=");
+                               if (*p)
+                                       audit_log_untrustedstring(ab, p);
diff --git a/queue-4.9/net-hsr-fix-memory-leak-in-hsr_dev_finalize.patch b/queue-4.9/net-hsr-fix-memory-leak-in-hsr_dev_finalize.patch
new file mode 100644 (file)
index 0000000..a7d82da
--- /dev/null
@@ -0,0 +1,102 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Mao Wenan <maowenan@huawei.com>
+Date: Wed, 6 Mar 2019 22:45:01 +0800
+Subject: net: hsr: fix memory leak in hsr_dev_finalize()
+
+From: Mao Wenan <maowenan@huawei.com>
+
+[ Upstream commit 3dc6da493a29dbeda9f13b637bd9c02c414b2261 ]
+
+If hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER) failed to
+add port, it directly returns res and forgets to free the node
+that allocated in hsr_create_self_node(), and forgets to delete
+the node->mac_list linked in hsr->self_node_db.
+
+BUG: memory leak
+unreferenced object 0xffff8881cfa0c780 (size 64):
+  comm "syz-executor.0", pid 2077, jiffies 4294717969 (age 2415.377s)
+  hex dump (first 32 bytes):
+    e0 c7 a0 cf 81 88 ff ff 00 02 00 00 00 00 ad de  ................
+    00 e6 49 cd 81 88 ff ff c0 9b 87 d0 81 88 ff ff  ..I.............
+  backtrace:
+    [<00000000e2ff5070>] hsr_dev_finalize+0x736/0x960 [hsr]
+    [<000000003ed2e597>] hsr_newlink+0x2b2/0x3e0 [hsr]
+    [<000000003fa8c6b6>] __rtnl_newlink+0xf1f/0x1600 net/core/rtnetlink.c:3182
+    [<000000001247a7ad>] rtnl_newlink+0x66/0x90 net/core/rtnetlink.c:3240
+    [<00000000e7d1b61d>] rtnetlink_rcv_msg+0x54e/0xb90 net/core/rtnetlink.c:5130
+    [<000000005556bd3a>] netlink_rcv_skb+0x129/0x340 net/netlink/af_netlink.c:2477
+    [<00000000741d5ee6>] netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
+    [<00000000741d5ee6>] netlink_unicast+0x49a/0x650 net/netlink/af_netlink.c:1336
+    [<000000009d56f9b7>] netlink_sendmsg+0x88b/0xdf0 net/netlink/af_netlink.c:1917
+    [<0000000046b35c59>] sock_sendmsg_nosec net/socket.c:621 [inline]
+    [<0000000046b35c59>] sock_sendmsg+0xc3/0x100 net/socket.c:631
+    [<00000000d208adc9>] __sys_sendto+0x33e/0x560 net/socket.c:1786
+    [<00000000b582837a>] __do_sys_sendto net/socket.c:1798 [inline]
+    [<00000000b582837a>] __se_sys_sendto net/socket.c:1794 [inline]
+    [<00000000b582837a>] __x64_sys_sendto+0xdd/0x1b0 net/socket.c:1794
+    [<00000000c866801d>] do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290
+    [<00000000fea382d9>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
+    [<00000000e01dacb3>] 0xffffffffffffffff
+
+Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/hsr/hsr_device.c   |    4 +++-
+ net/hsr/hsr_framereg.c |   12 ++++++++++++
+ net/hsr/hsr_framereg.h |    1 +
+ 3 files changed, 16 insertions(+), 1 deletion(-)
+
+--- a/net/hsr/hsr_device.c
++++ b/net/hsr/hsr_device.c
+@@ -485,7 +485,7 @@ int hsr_dev_finalize(struct net_device *
+       res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER);
+       if (res)
+-              return res;
++              goto err_add_port;
+       res = register_netdevice(hsr_dev);
+       if (res)
+@@ -505,6 +505,8 @@ int hsr_dev_finalize(struct net_device *
+ fail:
+       hsr_for_each_port(hsr, port)
+               hsr_del_port(port);
++err_add_port:
++      hsr_del_node(&hsr->self_node_db);
+       return res;
+ }
+--- a/net/hsr/hsr_framereg.c
++++ b/net/hsr/hsr_framereg.c
+@@ -124,6 +124,18 @@ int hsr_create_self_node(struct list_hea
+       return 0;
+ }
++void hsr_del_node(struct list_head *self_node_db)
++{
++      struct hsr_node *node;
++
++      rcu_read_lock();
++      node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
++      rcu_read_unlock();
++      if (node) {
++              list_del_rcu(&node->mac_list);
++              kfree(node);
++      }
++}
+ /* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA;
+  * seq_out is used to initialize filtering of outgoing duplicate frames
+--- a/net/hsr/hsr_framereg.h
++++ b/net/hsr/hsr_framereg.h
+@@ -16,6 +16,7 @@
+ struct hsr_node;
++void hsr_del_node(struct list_head *self_node_db);
+ struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
+                             u16 seq_out);
+ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
diff --git a/queue-4.9/net-hsr-fix-possible-crash-in-add_timer.patch b/queue-4.9/net-hsr-fix-possible-crash-in-add_timer.patch
new file mode 100644 (file)
index 0000000..48993f9
--- /dev/null
@@ -0,0 +1,135 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 7 Mar 2019 09:36:33 -0800
+Subject: net/hsr: fix possible crash in add_timer()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 1e027960edfaa6a43f9ca31081729b716598112b ]
+
+syzbot found another add_timer() issue, this time in net/hsr [1]
+
+Let's use mod_timer() which is safe.
+
+[1]
+kernel BUG at kernel/time/timer.c:1136!
+invalid opcode: 0000 [#1] PREEMPT SMP KASAN
+CPU: 0 PID: 15909 Comm: syz-executor.3 Not tainted 5.0.0+ #97
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+kobject: 'loop2' (00000000f5629718): kobject_uevent_env
+RIP: 0010:add_timer kernel/time/timer.c:1136 [inline]
+RIP: 0010:add_timer+0x654/0xbe0 kernel/time/timer.c:1134
+Code: 0f 94 c5 31 ff 44 89 ee e8 09 61 0f 00 45 84 ed 0f 84 77 fd ff ff e8 bb 5f 0f 00 e8 07 10 a0 ff e9 68 fd ff ff e8 ac 5f 0f 00 <0f> 0b e8 a5 5f 0f 00 0f 0b e8 9e 5f 0f 00 4c 89 b5 58 ff ff ff e9
+RSP: 0018:ffff8880656eeca0 EFLAGS: 00010246
+kobject: 'loop2' (00000000f5629718): fill_kobj_path: path = '/devices/virtual/block/loop2'
+RAX: 0000000000040000 RBX: 1ffff1100caddd9a RCX: ffffc9000c436000
+RDX: 0000000000040000 RSI: ffffffff816056c4 RDI: ffff88806a2f6cc8
+RBP: ffff8880656eed58 R08: ffff888067f4a300 R09: ffff888067f4abc8
+R10: 0000000000000000 R11: 0000000000000000 R12: ffff88806a2f6cc0
+R13: dffffc0000000000 R14: 0000000000000001 R15: ffff8880656eed30
+FS:  00007fc2019bf700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000738000 CR3: 0000000067e8e000 CR4: 00000000001406f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ hsr_check_announce net/hsr/hsr_device.c:99 [inline]
+ hsr_check_carrier_and_operstate+0x567/0x6f0 net/hsr/hsr_device.c:120
+ hsr_netdev_notify+0x297/0xa00 net/hsr/hsr_main.c:51
+ notifier_call_chain+0xc7/0x240 kernel/notifier.c:93
+ __raw_notifier_call_chain kernel/notifier.c:394 [inline]
+ raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401
+ call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739
+ call_netdevice_notifiers_extack net/core/dev.c:1751 [inline]
+ call_netdevice_notifiers net/core/dev.c:1765 [inline]
+ dev_open net/core/dev.c:1436 [inline]
+ dev_open+0x143/0x160 net/core/dev.c:1424
+ team_port_add drivers/net/team/team.c:1203 [inline]
+ team_add_slave+0xa07/0x15d0 drivers/net/team/team.c:1933
+ do_set_master net/core/rtnetlink.c:2358 [inline]
+ do_set_master+0x1d4/0x230 net/core/rtnetlink.c:2332
+ do_setlink+0x966/0x3510 net/core/rtnetlink.c:2493
+ rtnl_setlink+0x271/0x3b0 net/core/rtnetlink.c:2747
+ rtnetlink_rcv_msg+0x465/0xb00 net/core/rtnetlink.c:5192
+ netlink_rcv_skb+0x17a/0x460 net/netlink/af_netlink.c:2485
+ rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5210
+ netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
+ netlink_unicast+0x536/0x720 net/netlink/af_netlink.c:1336
+ netlink_sendmsg+0x8ae/0xd70 net/netlink/af_netlink.c:1925
+ sock_sendmsg_nosec net/socket.c:622 [inline]
+ sock_sendmsg+0xdd/0x130 net/socket.c:632
+ sock_write_iter+0x27c/0x3e0 net/socket.c:923
+ call_write_iter include/linux/fs.h:1869 [inline]
+ do_iter_readv_writev+0x5e0/0x8e0 fs/read_write.c:680
+ do_iter_write fs/read_write.c:956 [inline]
+ do_iter_write+0x184/0x610 fs/read_write.c:937
+ vfs_writev+0x1b3/0x2f0 fs/read_write.c:1001
+ do_writev+0xf6/0x290 fs/read_write.c:1036
+ __do_sys_writev fs/read_write.c:1109 [inline]
+ __se_sys_writev fs/read_write.c:1106 [inline]
+ __x64_sys_writev+0x75/0xb0 fs/read_write.c:1106
+ do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x457f29
+Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007fc2019bec78 EFLAGS: 00000246 ORIG_RAX: 0000000000000014
+RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457f29
+RDX: 0000000000000001 RSI: 00000000200000c0 RDI: 0000000000000003
+RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc2019bf6d4
+R13: 00000000004c4a60 R14: 00000000004dd218 R15: 00000000ffffffff
+
+Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Cc: Arvid Brodin <arvid.brodin@alten.se>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/hsr/hsr_device.c |   14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+--- a/net/hsr/hsr_device.c
++++ b/net/hsr/hsr_device.c
+@@ -94,9 +94,8 @@ static void hsr_check_announce(struct ne
+                       && (old_operstate != IF_OPER_UP)) {
+               /* Went up */
+               hsr->announce_count = 0;
+-              hsr->announce_timer.expires = jiffies +
+-                              msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
+-              add_timer(&hsr->announce_timer);
++              mod_timer(&hsr->announce_timer,
++                        jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
+       }
+       if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP))
+@@ -331,6 +330,7 @@ static void hsr_announce(unsigned long d
+ {
+       struct hsr_priv *hsr;
+       struct hsr_port *master;
++      unsigned long interval;
+       hsr = (struct hsr_priv *) data;
+@@ -342,18 +342,16 @@ static void hsr_announce(unsigned long d
+                               hsr->protVersion);
+               hsr->announce_count++;
+-              hsr->announce_timer.expires = jiffies +
+-                              msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
++              interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
+       } else {
+               send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK,
+                               hsr->protVersion);
+-              hsr->announce_timer.expires = jiffies +
+-                              msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
++              interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
+       }
+       if (is_admin_up(master->dev))
+-              add_timer(&hsr->announce_timer);
++              mod_timer(&hsr->announce_timer, jiffies + interval);
+       rcu_read_unlock();
+ }
diff --git a/queue-4.9/net-mlx4_core-fix-locking-in-sriov-mode-when-switching-between-events-and-polling.patch b/queue-4.9/net-mlx4_core-fix-locking-in-sriov-mode-when-switching-between-events-and-polling.patch
new file mode 100644 (file)
index 0000000..c895f50
--- /dev/null
@@ -0,0 +1,72 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Tue, 12 Mar 2019 17:05:48 +0200
+Subject: net/mlx4_core: Fix locking in SRIOV mode when switching between events and polling
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+[ Upstream commit c07d27927f2f2e96fcd27bb9fb330c9ea65612d0 ]
+
+In procedures mlx4_cmd_use_events() and mlx4_cmd_use_polling(), we need to
+guarantee that there are no FW commands in progress on the comm channel
+(for VFs) or wrapped FW commands (on the PF) when SRIOV is active.
+
+We do this by also taking the slave_cmd_mutex when SRIOV is active.
+
+This is especially important when switching from event to polling, since we
+free the command-context array during the switch.  If there are FW commands
+in progress (e.g., waiting for a completion event), the completion event
+handler will access freed memory.
+
+Since the decision to use comm_wait or comm_poll is taken before grabbing
+the event_sem/poll_sem in mlx4_comm_cmd_wait/poll, we must take the
+slave_cmd_mutex as well (to guarantee that the decision to use events or
+polling and the call to the appropriate cmd function are atomic).
+
+Fixes: a7e1f04905e5 ("net/mlx4_core: Fix deadlock when switching between polling and event fw commands")
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/cmd.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
+@@ -2633,6 +2633,8 @@ int mlx4_cmd_use_events(struct mlx4_dev
+       if (!priv->cmd.context)
+               return -ENOMEM;
++      if (mlx4_is_mfunc(dev))
++              mutex_lock(&priv->cmd.slave_cmd_mutex);
+       down_write(&priv->cmd.switch_sem);
+       for (i = 0; i < priv->cmd.max_cmds; ++i) {
+               priv->cmd.context[i].token = i;
+@@ -2658,6 +2660,8 @@ int mlx4_cmd_use_events(struct mlx4_dev
+       down(&priv->cmd.poll_sem);
+       priv->cmd.use_events = 1;
+       up_write(&priv->cmd.switch_sem);
++      if (mlx4_is_mfunc(dev))
++              mutex_unlock(&priv->cmd.slave_cmd_mutex);
+       return err;
+ }
+@@ -2670,6 +2674,8 @@ void mlx4_cmd_use_polling(struct mlx4_de
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       int i;
++      if (mlx4_is_mfunc(dev))
++              mutex_lock(&priv->cmd.slave_cmd_mutex);
+       down_write(&priv->cmd.switch_sem);
+       priv->cmd.use_events = 0;
+@@ -2681,6 +2687,8 @@ void mlx4_cmd_use_polling(struct mlx4_de
+       up(&priv->cmd.poll_sem);
+       up_write(&priv->cmd.switch_sem);
++      if (mlx4_is_mfunc(dev))
++              mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ }
+ struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev)
diff --git a/queue-4.9/net-mlx4_core-fix-qp-mtt-size-calculation.patch b/queue-4.9/net-mlx4_core-fix-qp-mtt-size-calculation.patch
new file mode 100644 (file)
index 0000000..4220e01
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Tue, 12 Mar 2019 17:05:49 +0200
+Subject: net/mlx4_core: Fix qp mtt size calculation
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+[ Upstream commit 8511a653e9250ef36b95803c375a7be0e2edb628 ]
+
+Calculation of qp mtt size (in function mlx4_RST2INIT_wrapper)
+ultimately depends on function roundup_pow_of_two.
+
+If the amount of memory required by the QP is less than one page,
+roundup_pow_of_two is called with argument zero.  In this case, the
+roundup_pow_of_two result is undefined.
+
+Calling roundup_pow_of_two with a zero argument resulted in the
+following stack trace:
+
+UBSAN: Undefined behaviour in ./include/linux/log2.h:61:13
+shift exponent 64 is too large for 64-bit type 'long unsigned int'
+CPU: 4 PID: 26939 Comm: rping Tainted: G OE 4.19.0-rc1
+Hardware name: Supermicro X9DR3-F/X9DR3-F, BIOS 3.2a 07/09/2015
+Call Trace:
+dump_stack+0x9a/0xeb
+ubsan_epilogue+0x9/0x7c
+__ubsan_handle_shift_out_of_bounds+0x254/0x29d
+? __ubsan_handle_load_invalid_value+0x180/0x180
+? debug_show_all_locks+0x310/0x310
+? sched_clock+0x5/0x10
+? sched_clock+0x5/0x10
+? sched_clock_cpu+0x18/0x260
+? find_held_lock+0x35/0x1e0
+? mlx4_RST2INIT_QP_wrapper+0xfb1/0x1440 [mlx4_core]
+mlx4_RST2INIT_QP_wrapper+0xfb1/0x1440 [mlx4_core]
+
+Fix this by explicitly testing for zero, and returning one if the
+argument is zero (assuming that the next higher power of 2 in this case
+should be one).
+
+Fixes: c82e9aa0a8bc ("mlx4_core: resource tracking for HCA resources used by guests")
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/resource_tracker.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
++++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+@@ -2677,13 +2677,13 @@ static int qp_get_mtt_size(struct mlx4_q
+       int total_pages;
+       int total_mem;
+       int page_offset = (be32_to_cpu(qpc->params2) >> 6) & 0x3f;
++      int tot;
+       sq_size = 1 << (log_sq_size + log_sq_sride + 4);
+       rq_size = (srq|rss|xrc) ? 0 : (1 << (log_rq_size + log_rq_stride + 4));
+       total_mem = sq_size + rq_size;
+-      total_pages =
+-              roundup_pow_of_two((total_mem + (page_offset << 6)) >>
+-                                 page_shift);
++      tot = (total_mem + (page_offset << 6)) >> page_shift;
++      total_pages = !tot ? 1 : roundup_pow_of_two(tot);
+       return total_pages;
+ }
diff --git a/queue-4.9/net-mlx4_core-fix-reset-flow-when-in-command-polling-mode.patch b/queue-4.9/net-mlx4_core-fix-reset-flow-when-in-command-polling-mode.patch
new file mode 100644 (file)
index 0000000..0ac8f5b
--- /dev/null
@@ -0,0 +1,85 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Tue, 12 Mar 2019 17:05:47 +0200
+Subject: net/mlx4_core: Fix reset flow when in command polling mode
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+[ Upstream commit e15ce4b8d11227007577e6dc1364d288b8874fbe ]
+
+As part of unloading a device, the driver switches from
+FW command event mode to FW command polling mode.
+
+Part of switching over to polling mode is freeing the command context array
+memory (unfortunately, currently, without NULLing the command context array
+pointer).
+
+The reset flow calls "complete" to complete all outstanding fw commands
+(if we are in event mode). The check for event vs. polling mode here
+is to test if the command context array pointer is NULL.
+
+If the reset flow is activated after the switch to polling mode, it will
+attempt (incorrectly) to complete all the commands in the context array --
+because the pointer was not NULLed when the driver switched over to polling
+mode.
+
+As a result, we have a use-after-free situation, which results in a
+kernel crash.
+
+For example:
+BUG: unable to handle kernel NULL pointer dereference at           (null)
+IP: [<ffffffff876c4a8e>] __wake_up_common+0x2e/0x90
+PGD 0
+Oops: 0000 [#1] SMP
+Modules linked in: netconsole nfsv3 nfs_acl nfs lockd grace ...
+CPU: 2 PID: 940 Comm: kworker/2:3 Kdump: loaded Not tainted 3.10.0-862.el7.x86_64 #1
+Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090006  04/28/2016
+Workqueue: events hv_eject_device_work [pci_hyperv]
+task: ffff8d1734ca0fd0 ti: ffff8d17354bc000 task.ti: ffff8d17354bc000
+RIP: 0010:[<ffffffff876c4a8e>]  [<ffffffff876c4a8e>] __wake_up_common+0x2e/0x90
+RSP: 0018:ffff8d17354bfa38  EFLAGS: 00010082
+RAX: 0000000000000000 RBX: ffff8d17362d42c8 RCX: 0000000000000000
+RDX: 0000000000000001 RSI: 0000000000000003 RDI: ffff8d17362d42c8
+RBP: ffff8d17354bfa70 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000298 R11: ffff8d173610e000 R12: ffff8d17362d42d0
+R13: 0000000000000246 R14: 0000000000000000 R15: 0000000000000003
+FS:  0000000000000000(0000) GS:ffff8d1802680000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000000 CR3: 00000000f16d8000 CR4: 00000000001406e0
+Call Trace:
+ [<ffffffff876c7adc>] complete+0x3c/0x50
+ [<ffffffffc04242f0>] mlx4_cmd_wake_completions+0x70/0x90 [mlx4_core]
+ [<ffffffffc041e7b1>] mlx4_enter_error_state+0xe1/0x380 [mlx4_core]
+ [<ffffffffc041fa4b>] mlx4_comm_cmd+0x29b/0x360 [mlx4_core]
+ [<ffffffffc041ff51>] __mlx4_cmd+0x441/0x920 [mlx4_core]
+ [<ffffffff877f62b1>] ? __slab_free+0x81/0x2f0
+ [<ffffffff87951384>] ? __radix_tree_lookup+0x84/0xf0
+ [<ffffffffc043a8eb>] mlx4_free_mtt_range+0x5b/0xb0 [mlx4_core]
+ [<ffffffffc043a957>] mlx4_mtt_cleanup+0x17/0x20 [mlx4_core]
+ [<ffffffffc04272c7>] mlx4_free_eq+0xa7/0x1c0 [mlx4_core]
+ [<ffffffffc042803e>] mlx4_cleanup_eq_table+0xde/0x130 [mlx4_core]
+ [<ffffffffc0433e08>] mlx4_unload_one+0x118/0x300 [mlx4_core]
+ [<ffffffffc0434191>] mlx4_remove_one+0x91/0x1f0 [mlx4_core]
+
+The fix is to set the command context array pointer to NULL after freeing
+the array.
+
+Fixes: f5aef5aa3506 ("net/mlx4_core: Activate reset flow upon fatal command cases")
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/cmd.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
+@@ -2677,6 +2677,7 @@ void mlx4_cmd_use_polling(struct mlx4_de
+               down(&priv->cmd.event_sem);
+       kfree(priv->cmd.context);
++      priv->cmd.context = NULL;
+       up(&priv->cmd.poll_sem);
+       up_write(&priv->cmd.switch_sem);
diff --git a/queue-4.9/net-set-rtm_table-to-rt_table_compat-for-ipv6-for-tables-255.patch b/queue-4.9/net-set-rtm_table-to-rt_table_compat-for-ipv6-for-tables-255.patch
new file mode 100644 (file)
index 0000000..4ed783e
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Kalash Nainwal <kalash@arista.com>
+Date: Wed, 20 Feb 2019 16:23:04 -0800
+Subject: net: Set rtm_table to RT_TABLE_COMPAT for ipv6 for tables > 255
+
+From: Kalash Nainwal <kalash@arista.com>
+
+[ Upstream commit 97f0082a0592212fc15d4680f5a4d80f79a1687c ]
+
+Set rtm_table to RT_TABLE_COMPAT for ipv6 for tables > 255 to
+keep legacy software happy. This is similar to what was done for
+ipv4 in commit 709772e6e065 ("net: Fix routing tables with
+id > 255 for legacy software").
+
+Signed-off-by: Kalash Nainwal <kalash@arista.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -3211,7 +3211,7 @@ static int rt6_fill_node(struct net *net
+               table = rt->rt6i_table->tb6_id;
+       else
+               table = RT6_TABLE_UNSPEC;
+-      rtm->rtm_table = table;
++      rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
+       if (nla_put_u32(skb, RTA_TABLE, table))
+               goto nla_put_failure;
+       if (rt->rt6i_flags & RTF_REJECT) {
diff --git a/queue-4.9/net-sit-fix-ubsan-undefined-behaviour-in-check_6rd.patch b/queue-4.9/net-sit-fix-ubsan-undefined-behaviour-in-check_6rd.patch
new file mode 100644 (file)
index 0000000..00bdcca
--- /dev/null
@@ -0,0 +1,71 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Mon, 11 Mar 2019 16:29:32 +0800
+Subject: net: sit: fix UBSAN Undefined behaviour in check_6rd
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+[ Upstream commit a843dc4ebaecd15fca1f4d35a97210f72ea1473b ]
+
+In func check_6rd,tunnel->ip6rd.relay_prefixlen may equal to
+32,so UBSAN complain about it.
+
+UBSAN: Undefined behaviour in net/ipv6/sit.c:781:47
+shift exponent 32 is too large for 32-bit type 'unsigned int'
+CPU: 6 PID: 20036 Comm: syz-executor.0 Not tainted 4.19.27 #2
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1
+04/01/2014
+Call Trace:
+__dump_stack lib/dump_stack.c:77 [inline]
+dump_stack+0xca/0x13e lib/dump_stack.c:113
+ubsan_epilogue+0xe/0x81 lib/ubsan.c:159
+__ubsan_handle_shift_out_of_bounds+0x293/0x2e8 lib/ubsan.c:425
+check_6rd.constprop.9+0x433/0x4e0 net/ipv6/sit.c:781
+try_6rd net/ipv6/sit.c:806 [inline]
+ipip6_tunnel_xmit net/ipv6/sit.c:866 [inline]
+sit_tunnel_xmit+0x141c/0x2720 net/ipv6/sit.c:1033
+__netdev_start_xmit include/linux/netdevice.h:4300 [inline]
+netdev_start_xmit include/linux/netdevice.h:4309 [inline]
+xmit_one net/core/dev.c:3243 [inline]
+dev_hard_start_xmit+0x17c/0x780 net/core/dev.c:3259
+__dev_queue_xmit+0x1656/0x2500 net/core/dev.c:3829
+neigh_output include/net/neighbour.h:501 [inline]
+ip6_finish_output2+0xa36/0x2290 net/ipv6/ip6_output.c:120
+ip6_finish_output+0x3e7/0xa20 net/ipv6/ip6_output.c:154
+NF_HOOK_COND include/linux/netfilter.h:278 [inline]
+ip6_output+0x1e2/0x720 net/ipv6/ip6_output.c:171
+dst_output include/net/dst.h:444 [inline]
+ip6_local_out+0x99/0x170 net/ipv6/output_core.c:176
+ip6_send_skb+0x9d/0x2f0 net/ipv6/ip6_output.c:1697
+ip6_push_pending_frames+0xc0/0x100 net/ipv6/ip6_output.c:1717
+rawv6_push_pending_frames net/ipv6/raw.c:616 [inline]
+rawv6_sendmsg+0x2435/0x3530 net/ipv6/raw.c:946
+inet_sendmsg+0xf8/0x5c0 net/ipv4/af_inet.c:798
+sock_sendmsg_nosec net/socket.c:621 [inline]
+sock_sendmsg+0xc8/0x110 net/socket.c:631
+___sys_sendmsg+0x6cf/0x890 net/socket.c:2114
+__sys_sendmsg+0xf0/0x1b0 net/socket.c:2152
+do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:290
+entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+Signed-off-by: linmiaohe <linmiaohe@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/sit.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/sit.c
++++ b/net/ipv6/sit.c
+@@ -767,8 +767,9 @@ static bool check_6rd(struct ip_tunnel *
+               pbw0 = tunnel->ip6rd.prefixlen >> 5;
+               pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
+-              d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
+-                  tunnel->ip6rd.relay_prefixlen;
++              d = tunnel->ip6rd.relay_prefixlen < 32 ?
++                      (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
++                  tunnel->ip6rd.relay_prefixlen : 0;
+               pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
+               if (pbi1 > 0)
diff --git a/queue-4.9/net-x25-fix-a-race-in-x25_bind.patch b/queue-4.9/net-x25-fix-a-race-in-x25_bind.patch
new file mode 100644 (file)
index 0000000..919c002
--- /dev/null
@@ -0,0 +1,133 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 23 Feb 2019 13:24:59 -0800
+Subject: net/x25: fix a race in x25_bind()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 797a22bd5298c2674d927893f46cadf619dad11d ]
+
+syzbot was able to trigger another soft lockup [1]
+
+I first thought it was the O(N^2) issue I mentioned in my
+prior fix (f657d22ee1f "net/x25: do not hold the cpu
+too long in x25_new_lci()"), but I eventually found
+that x25_bind() was not checking SOCK_ZAPPED state under
+socket lock protection.
+
+This means that multiple threads can end up calling
+x25_insert_socket() for the same socket, and corrupt x25_list
+
+[1]
+watchdog: BUG: soft lockup - CPU#0 stuck for 123s! [syz-executor.2:10492]
+Modules linked in:
+irq event stamp: 27515
+hardirqs last  enabled at (27514): [<ffffffff81006673>] trace_hardirqs_on_thunk+0x1a/0x1c
+hardirqs last disabled at (27515): [<ffffffff8100668f>] trace_hardirqs_off_thunk+0x1a/0x1c
+softirqs last  enabled at (32): [<ffffffff8632ee73>] x25_get_neigh+0xa3/0xd0 net/x25/x25_link.c:336
+softirqs last disabled at (34): [<ffffffff86324bc3>] x25_find_socket+0x23/0x140 net/x25/af_x25.c:341
+CPU: 0 PID: 10492 Comm: syz-executor.2 Not tainted 5.0.0-rc7+ #88
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+RIP: 0010:__sanitizer_cov_trace_pc+0x4/0x50 kernel/kcov.c:97
+Code: f4 ff ff ff e8 11 9f ea ff 48 c7 05 12 fb e5 08 00 00 00 00 e9 c8 e9 ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 55 48 89 e5 <48> 8b 75 08 65 48 8b 04 25 40 ee 01 00 65 8b 15 38 0c 92 7e 81 e2
+RSP: 0018:ffff88806e94fc48 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13
+RAX: 1ffff1100d84dac5 RBX: 0000000000000001 RCX: ffffc90006197000
+RDX: 0000000000040000 RSI: ffffffff86324bf3 RDI: ffff88806c26d628
+RBP: ffff88806e94fc48 R08: ffff88806c1c6500 R09: fffffbfff1282561
+R10: fffffbfff1282560 R11: ffffffff89412b03 R12: ffff88806c26d628
+R13: ffff888090455200 R14: dffffc0000000000 R15: 0000000000000000
+FS:  00007f3a107e4700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f3a107e3db8 CR3: 00000000a5544000 CR4: 00000000001406f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ __x25_find_socket net/x25/af_x25.c:327 [inline]
+ x25_find_socket+0x7d/0x140 net/x25/af_x25.c:342
+ x25_new_lci net/x25/af_x25.c:355 [inline]
+ x25_connect+0x380/0xde0 net/x25/af_x25.c:784
+ __sys_connect+0x266/0x330 net/socket.c:1662
+ __do_sys_connect net/socket.c:1673 [inline]
+ __se_sys_connect net/socket.c:1670 [inline]
+ __x64_sys_connect+0x73/0xb0 net/socket.c:1670
+ do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x457e29
+Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007f3a107e3c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a
+RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457e29
+RDX: 0000000000000012 RSI: 0000000020000200 RDI: 0000000000000005
+RBP: 000000000073c040 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00007f3a107e46d4
+R13: 00000000004be362 R14: 00000000004ceb98 R15: 00000000ffffffff
+Sending NMI from CPU 0 to CPUs 1:
+NMI backtrace for cpu 1
+CPU: 1 PID: 10493 Comm: syz-executor.3 Not tainted 5.0.0-rc7+ #88
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+RIP: 0010:__read_once_size include/linux/compiler.h:193 [inline]
+RIP: 0010:queued_write_lock_slowpath+0x143/0x290 kernel/locking/qrwlock.c:86
+Code: 4c 8d 2c 01 41 83 c7 03 41 0f b6 45 00 41 38 c7 7c 08 84 c0 0f 85 0c 01 00 00 8b 03 3d 00 01 00 00 74 1a f3 90 41 0f b6 55 00 <41> 38 d7 7c eb 84 d2 74 e7 48 89 df e8 cc aa 4e 00 eb dd be 04 00
+RSP: 0018:ffff888085c47bd8 EFLAGS: 00000206
+RAX: 0000000000000300 RBX: ffffffff89412b00 RCX: 1ffffffff1282560
+RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff89412b00
+RBP: ffff888085c47c70 R08: 1ffffffff1282560 R09: fffffbfff1282561
+R10: fffffbfff1282560 R11: ffffffff89412b03 R12: 00000000000000ff
+R13: fffffbfff1282560 R14: 1ffff11010b88f7d R15: 0000000000000003
+FS:  00007fdd04086700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007fdd04064db8 CR3: 0000000090be0000 CR4: 00000000001406e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ queued_write_lock include/asm-generic/qrwlock.h:104 [inline]
+ do_raw_write_lock+0x1d6/0x290 kernel/locking/spinlock_debug.c:203
+ __raw_write_lock_bh include/linux/rwlock_api_smp.h:204 [inline]
+ _raw_write_lock_bh+0x3b/0x50 kernel/locking/spinlock.c:312
+ x25_insert_socket+0x21/0xe0 net/x25/af_x25.c:267
+ x25_bind+0x273/0x340 net/x25/af_x25.c:703
+ __sys_bind+0x23f/0x290 net/socket.c:1481
+ __do_sys_bind net/socket.c:1492 [inline]
+ __se_sys_bind net/socket.c:1490 [inline]
+ __x64_sys_bind+0x73/0xb0 net/socket.c:1490
+ do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x457e29
+
+Fixes: 90c27297a9bf ("X.25 remove bkl in bind")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: andrew hendry <andrew.hendry@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/x25/af_x25.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/net/x25/af_x25.c
++++ b/net/x25/af_x25.c
+@@ -678,8 +678,7 @@ static int x25_bind(struct socket *sock,
+       struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr;
+       int len, i, rc = 0;
+-      if (!sock_flag(sk, SOCK_ZAPPED) ||
+-          addr_len != sizeof(struct sockaddr_x25) ||
++      if (addr_len != sizeof(struct sockaddr_x25) ||
+           addr->sx25_family != AF_X25) {
+               rc = -EINVAL;
+               goto out;
+@@ -694,9 +693,13 @@ static int x25_bind(struct socket *sock,
+       }
+       lock_sock(sk);
+-      x25_sk(sk)->source_addr = addr->sx25_addr;
+-      x25_insert_socket(sk);
+-      sock_reset_flag(sk, SOCK_ZAPPED);
++      if (sock_flag(sk, SOCK_ZAPPED)) {
++              x25_sk(sk)->source_addr = addr->sx25_addr;
++              x25_insert_socket(sk);
++              sock_reset_flag(sk, SOCK_ZAPPED);
++      } else {
++              rc = -EINVAL;
++      }
+       release_sock(sk);
+       SOCK_DEBUG(sk, "x25_bind: socket is bound\n");
+ out:
diff --git a/queue-4.9/net-x25-fix-use-after-free-in-x25_device_event.patch b/queue-4.9/net-x25-fix-use-after-free-in-x25_device_event.patch
new file mode 100644 (file)
index 0000000..33b857a
--- /dev/null
@@ -0,0 +1,145 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 10 Mar 2019 09:07:14 -0700
+Subject: net/x25: fix use-after-free in x25_device_event()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 95d6ebd53c79522bf9502dbc7e89e0d63f94dae4 ]
+
+In case of failure x25_connect() does a x25_neigh_put(x25->neighbour)
+but forgets to clear x25->neighbour pointer, thus triggering use-after-free.
+
+Since the socket is visible in x25_list, we need to hold x25_list_lock
+to protect the operation.
+
+syzbot report :
+
+BUG: KASAN: use-after-free in x25_kill_by_device net/x25/af_x25.c:217 [inline]
+BUG: KASAN: use-after-free in x25_device_event+0x296/0x2b0 net/x25/af_x25.c:252
+Read of size 8 at addr ffff8880a030edd0 by task syz-executor003/7854
+
+CPU: 0 PID: 7854 Comm: syz-executor003 Not tainted 5.0.0+ #97
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x172/0x1f0 lib/dump_stack.c:113
+ print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187
+ kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317
+ __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135
+ x25_kill_by_device net/x25/af_x25.c:217 [inline]
+ x25_device_event+0x296/0x2b0 net/x25/af_x25.c:252
+ notifier_call_chain+0xc7/0x240 kernel/notifier.c:93
+ __raw_notifier_call_chain kernel/notifier.c:394 [inline]
+ raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401
+ call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739
+ call_netdevice_notifiers_extack net/core/dev.c:1751 [inline]
+ call_netdevice_notifiers net/core/dev.c:1765 [inline]
+ __dev_notify_flags+0x1e9/0x2c0 net/core/dev.c:7607
+ dev_change_flags+0x10d/0x170 net/core/dev.c:7643
+ dev_ifsioc+0x2b0/0x940 net/core/dev_ioctl.c:237
+ dev_ioctl+0x1b8/0xc70 net/core/dev_ioctl.c:488
+ sock_do_ioctl+0x1bd/0x300 net/socket.c:995
+ sock_ioctl+0x32b/0x610 net/socket.c:1096
+ vfs_ioctl fs/ioctl.c:46 [inline]
+ file_ioctl fs/ioctl.c:509 [inline]
+ do_vfs_ioctl+0xd6e/0x1390 fs/ioctl.c:696
+ ksys_ioctl+0xab/0xd0 fs/ioctl.c:713
+ __do_sys_ioctl fs/ioctl.c:720 [inline]
+ __se_sys_ioctl fs/ioctl.c:718 [inline]
+ __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718
+ do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x4467c9
+Code: e8 0c e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 5b 07 fc ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007fdbea222d98 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+RAX: ffffffffffffffda RBX: 00000000006dbc58 RCX: 00000000004467c9
+RDX: 0000000020000340 RSI: 0000000000008914 RDI: 0000000000000003
+RBP: 00000000006dbc50 R08: 00007fdbea223700 R09: 0000000000000000
+R10: 00007fdbea223700 R11: 0000000000000246 R12: 00000000006dbc5c
+R13: 6000030030626669 R14: 0000000000000000 R15: 0000000030626669
+
+Allocated by task 7843:
+ save_stack+0x45/0xd0 mm/kasan/common.c:73
+ set_track mm/kasan/common.c:85 [inline]
+ __kasan_kmalloc mm/kasan/common.c:495 [inline]
+ __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:468
+ kasan_kmalloc+0x9/0x10 mm/kasan/common.c:509
+ kmem_cache_alloc_trace+0x151/0x760 mm/slab.c:3615
+ kmalloc include/linux/slab.h:545 [inline]
+ x25_link_device_up+0x46/0x3f0 net/x25/x25_link.c:249
+ x25_device_event+0x116/0x2b0 net/x25/af_x25.c:242
+ notifier_call_chain+0xc7/0x240 kernel/notifier.c:93
+ __raw_notifier_call_chain kernel/notifier.c:394 [inline]
+ raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401
+ call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739
+ call_netdevice_notifiers_extack net/core/dev.c:1751 [inline]
+ call_netdevice_notifiers net/core/dev.c:1765 [inline]
+ __dev_notify_flags+0x121/0x2c0 net/core/dev.c:7605
+ dev_change_flags+0x10d/0x170 net/core/dev.c:7643
+ dev_ifsioc+0x2b0/0x940 net/core/dev_ioctl.c:237
+ dev_ioctl+0x1b8/0xc70 net/core/dev_ioctl.c:488
+ sock_do_ioctl+0x1bd/0x300 net/socket.c:995
+ sock_ioctl+0x32b/0x610 net/socket.c:1096
+ vfs_ioctl fs/ioctl.c:46 [inline]
+ file_ioctl fs/ioctl.c:509 [inline]
+ do_vfs_ioctl+0xd6e/0x1390 fs/ioctl.c:696
+ ksys_ioctl+0xab/0xd0 fs/ioctl.c:713
+ __do_sys_ioctl fs/ioctl.c:720 [inline]
+ __se_sys_ioctl fs/ioctl.c:718 [inline]
+ __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718
+ do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+Freed by task 7865:
+ save_stack+0x45/0xd0 mm/kasan/common.c:73
+ set_track mm/kasan/common.c:85 [inline]
+ __kasan_slab_free+0x102/0x150 mm/kasan/common.c:457
+ kasan_slab_free+0xe/0x10 mm/kasan/common.c:465
+ __cache_free mm/slab.c:3494 [inline]
+ kfree+0xcf/0x230 mm/slab.c:3811
+ x25_neigh_put include/net/x25.h:253 [inline]
+ x25_connect+0x8d8/0xde0 net/x25/af_x25.c:824
+ __sys_connect+0x266/0x330 net/socket.c:1685
+ __do_sys_connect net/socket.c:1696 [inline]
+ __se_sys_connect net/socket.c:1693 [inline]
+ __x64_sys_connect+0x73/0xb0 net/socket.c:1693
+ do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+The buggy address belongs to the object at ffff8880a030edc0
+ which belongs to the cache kmalloc-256 of size 256
+The buggy address is located 16 bytes inside of
+ 256-byte region [ffff8880a030edc0, ffff8880a030eec0)
+The buggy address belongs to the page:
+page:ffffea000280c380 count:1 mapcount:0 mapping:ffff88812c3f07c0 index:0x0
+flags: 0x1fffc0000000200(slab)
+raw: 01fffc0000000200 ffffea0002806788 ffffea00027f0188 ffff88812c3f07c0
+raw: 0000000000000000 ffff8880a030e000 000000010000000c 0000000000000000
+page dumped because: kasan: bad access detected
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot+04babcefcd396fabec37@syzkaller.appspotmail.com
+Cc: andrew hendry <andrew.hendry@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/x25/af_x25.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/net/x25/af_x25.c
++++ b/net/x25/af_x25.c
+@@ -812,8 +812,12 @@ static int x25_connect(struct socket *so
+       sock->state = SS_CONNECTED;
+       rc = 0;
+ out_put_neigh:
+-      if (rc)
++      if (rc) {
++              read_lock_bh(&x25_list_lock);
+               x25_neigh_put(x25->neighbour);
++              x25->neighbour = NULL;
++              read_unlock_bh(&x25_list_lock);
++      }
+ out_put_route:
+       x25_route_put(rt);
+ out:
diff --git a/queue-4.9/net-x25-reset-state-in-x25_connect.patch b/queue-4.9/net-x25-reset-state-in-x25_connect.patch
new file mode 100644 (file)
index 0000000..7424991
--- /dev/null
@@ -0,0 +1,81 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 11 Mar 2019 13:48:44 -0700
+Subject: net/x25: reset state in x25_connect()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ee74d0bd4325efb41e38affe5955f920ed973f23 ]
+
+In case x25_connect() fails and frees the socket neighbour,
+we also need to undo the change done to x25->state.
+
+Before my last bug fix, we had use-after-free so this
+patch fixes a latent bug.
+
+syzbot report :
+
+kasan: CONFIG_KASAN_INLINE enabled
+kasan: GPF could be caused by NULL-ptr deref or user memory access
+general protection fault: 0000 [#1] PREEMPT SMP KASAN
+CPU: 1 PID: 16137 Comm: syz-executor.1 Not tainted 5.0.0+ #117
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+RIP: 0010:x25_write_internal+0x1e8/0xdf0 net/x25/x25_subr.c:173
+Code: 00 40 88 b5 e0 fe ff ff 0f 85 01 0b 00 00 48 8b 8b 80 04 00 00 48 ba 00 00 00 00 00 fc ff df 48 8d 79 1c 48 89 fe 48 c1 ee 03 <0f> b6 34 16 48 89 fa 83 e2 07 83 c2 03 40 38 f2 7c 09 40 84 f6 0f
+RSP: 0018:ffff888076717a08 EFLAGS: 00010207
+RAX: ffff88805f2f2292 RBX: ffff8880a0ae6000 RCX: 0000000000000000
+kobject: 'loop5' (0000000018d0d0ee): kobject_uevent_env
+RDX: dffffc0000000000 RSI: 0000000000000003 RDI: 000000000000001c
+RBP: ffff888076717b40 R08: ffff8880950e0580 R09: ffffed100be5e46d
+R10: ffffed100be5e46c R11: ffff88805f2f2363 R12: ffff888065579840
+kobject: 'loop5' (0000000018d0d0ee): fill_kobj_path: path = '/devices/virtual/block/loop5'
+R13: 1ffff1100ece2f47 R14: 0000000000000013 R15: 0000000000000013
+FS:  00007fb88cf43700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f9a42a41028 CR3: 0000000087a67000 CR4: 00000000001406e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ x25_release+0xd0/0x340 net/x25/af_x25.c:658
+ __sock_release+0xd3/0x2b0 net/socket.c:579
+ sock_close+0x1b/0x30 net/socket.c:1162
+ __fput+0x2df/0x8d0 fs/file_table.c:278
+ ____fput+0x16/0x20 fs/file_table.c:309
+ task_work_run+0x14a/0x1c0 kernel/task_work.c:113
+ get_signal+0x1961/0x1d50 kernel/signal.c:2388
+ do_signal+0x87/0x1940 arch/x86/kernel/signal.c:816
+ exit_to_usermode_loop+0x244/0x2c0 arch/x86/entry/common.c:162
+ prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
+ syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
+ do_syscall_64+0x52d/0x610 arch/x86/entry/common.c:293
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x457f29
+Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007fb88cf42c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a
+RAX: fffffffffffffe00 RBX: 0000000000000003 RCX: 0000000000457f29
+RDX: 0000000000000012 RSI: 0000000020000080 RDI: 0000000000000004
+RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb88cf436d4
+R13: 00000000004be462 R14: 00000000004cec98 R15: 00000000ffffffff
+Modules linked in:
+
+Fixes: 95d6ebd53c79 ("net/x25: fix use-after-free in x25_device_event()")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: andrew hendry <andrew.hendry@gmail.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/x25/af_x25.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/x25/af_x25.c
++++ b/net/x25/af_x25.c
+@@ -817,6 +817,7 @@ out_put_neigh:
+               x25_neigh_put(x25->neighbour);
+               x25->neighbour = NULL;
+               read_unlock_bh(&x25_list_lock);
++              x25->state = X25_STATE_0;
+       }
+ out_put_route:
+       x25_route_put(rt);
diff --git a/queue-4.9/pptp-dst_release-sk_dst_cache-in-pptp_sock_destruct.patch b/queue-4.9/pptp-dst_release-sk_dst_cache-in-pptp_sock_destruct.patch
new file mode 100644 (file)
index 0000000..7cea599
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 13 Mar 2019 17:00:48 +0800
+Subject: pptp: dst_release sk_dst_cache in pptp_sock_destruct
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 9417d81f4f8adfe20a12dd1fadf73a618cbd945d ]
+
+sk_setup_caps() is called to set sk->sk_dst_cache in pptp_connect,
+so we have to dst_release(sk->sk_dst_cache) in pptp_sock_destruct,
+otherwise, the dst refcnt will leak.
+
+It can be reproduced by this syz log:
+
+  r1 = socket$pptp(0x18, 0x1, 0x2)
+  bind$pptp(r1, &(0x7f0000000100)={0x18, 0x2, {0x0, @local}}, 0x1e)
+  connect$pptp(r1, &(0x7f0000000000)={0x18, 0x2, {0x3, @remote}}, 0x1e)
+
+Consecutive dmesg warnings will occur:
+
+  unregister_netdevice: waiting for lo to become free. Usage count = 1
+
+v1->v2:
+  - use rcu_dereference_protected() instead of rcu_dereference_check(),
+    as suggested by Eric.
+
+Fixes: 00959ade36ac ("PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol)")
+Reported-by: Xiumei Mu <xmu@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/pptp.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ppp/pptp.c
++++ b/drivers/net/ppp/pptp.c
+@@ -541,6 +541,7 @@ static void pptp_sock_destruct(struct so
+               pppox_unbind_sock(sk);
+       }
+       skb_queue_purge(&sk->sk_receive_queue);
++      dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
+ }
+ static int pptp_create(struct net *net, struct socket *sock, int kern)
diff --git a/queue-4.9/ravb-decrease-txfifo-depth-of-q3-and-q2-to-one.patch b/queue-4.9/ravb-decrease-txfifo-depth-of-q3-and-q2-to-one.patch
new file mode 100644 (file)
index 0000000..26f0715
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Masaru Nagai <masaru.nagai.vx@renesas.com>
+Date: Thu, 7 Mar 2019 11:24:47 +0100
+Subject: ravb: Decrease TxFIFO depth of Q3 and Q2 to one
+
+From: Masaru Nagai <masaru.nagai.vx@renesas.com>
+
+[ Upstream commit ae9819e339b451da7a86ab6fe38ecfcb6814e78a ]
+
+Hardware has the CBS (Credit Based Shaper) which affects only Q3
+and Q2. When updating the CBS settings, even if the driver does so
+after waiting for Tx DMA finished, there is a possibility that frame
+data still remains in TxFIFO.
+
+To avoid this, decrease TxFIFO depth of Q3 and Q2 to one.
+
+This patch has been exercised this using netperf TCP_MAERTS, TCP_STREAM
+and UDP_STREAM tests run on an Ebisu board. No performance change was
+detected, outside of noise in the tests, both in terms of throughput and
+CPU utilisation.
+
+Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper")
+Signed-off-by: Masaru Nagai <masaru.nagai.vx@renesas.com>
+Signed-off-by: Kazuya Mizuguchi <kazuya.mizuguchi.ks@renesas.com>
+[simon: updated changelog]
+Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/renesas/ravb_main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/renesas/ravb_main.c
++++ b/drivers/net/ethernet/renesas/ravb_main.c
+@@ -457,7 +457,7 @@ static int ravb_dmac_init(struct net_dev
+                  RCR_EFFS | RCR_ENCF | RCR_ETS0 | RCR_ESF | 0x18000000, RCR);
+       /* Set FIFO size */
+-      ravb_write(ndev, TGC_TQP_AVBMODE1 | 0x00222200, TGC);
++      ravb_write(ndev, TGC_TQP_AVBMODE1 | 0x00112200, TGC);
+       /* Timestamp enable */
+       ravb_write(ndev, TCCR_TFEN, TCCR);
diff --git a/queue-4.9/route-set-the-deleted-fnhe-fnhe_daddr-to-0-in-ip_del_fnhe-to-fix-a-race.patch b/queue-4.9/route-set-the-deleted-fnhe-fnhe_daddr-to-0-in-ip_del_fnhe-to-fix-a-race.patch
new file mode 100644 (file)
index 0000000..ebc35bf
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Xin Long <lucien.xin@gmail.com>
+Date: Fri, 8 Mar 2019 14:50:54 +0800
+Subject: route: set the deleted fnhe fnhe_daddr to 0 in ip_del_fnhe to fix a race
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit ee60ad219f5c7c4fb2f047f88037770063ef785f ]
+
+The race occurs in __mkroute_output() when 2 threads lookup a dst:
+
+  CPU A                 CPU B
+  find_exception()
+                        find_exception() [fnhe expires]
+                        ip_del_fnhe() [fnhe is deleted]
+  rt_bind_exception()
+
+In rt_bind_exception() it will bind a deleted fnhe with the new dst, and
+this dst will get no chance to be freed. It causes a dev defcnt leak and
+consecutive dmesg warnings:
+
+  unregister_netdevice: waiting for ethX to become free. Usage count = 1
+
+Especially thanks Jon to identify the issue.
+
+This patch fixes it by setting fnhe_daddr to 0 in ip_del_fnhe() to stop
+binding the deleted fnhe with a new dst when checking fnhe's fnhe_daddr
+and daddr in rt_bind_exception().
+
+It works as both ip_del_fnhe() and rt_bind_exception() are protected by
+fnhe_lock and the fhne is freed by kfree_rcu().
+
+Fixes: deed49df7390 ("route: check and remove route cache when we get route")
+Signed-off-by: Jon Maxwell <jmaxwell37@gmail.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1613,6 +1613,10 @@ static void ip_del_fnhe(struct fib_nh *n
+               if (fnhe->fnhe_daddr == daddr) {
+                       rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+                               fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
++                      /* set fnhe_daddr to 0 to ensure it won't bind with
++                       * new dsts in rt_bind_exception().
++                       */
++                      fnhe->fnhe_daddr = 0;
+                       fnhe_flush_routes(fnhe);
+                       kfree_rcu(fnhe, rcu);
+                       break;
diff --git a/queue-4.9/rxrpc-fix-client-call-queueing-waiting-for-channel.patch b/queue-4.9/rxrpc-fix-client-call-queueing-waiting-for-channel.patch
new file mode 100644 (file)
index 0000000..ff9f594
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: David Howells <dhowells@redhat.com>
+Date: Sat, 9 Mar 2019 00:29:58 +0000
+Subject: rxrpc: Fix client call queueing, waiting for channel
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 69ffaebb90369ce08657b5aea4896777b9d6e8fc ]
+
+rxrpc_get_client_conn() adds a new call to the front of the waiting_calls
+queue if the connection it's going to use already exists.  This is bad as
+it allows calls to get starved out.
+
+Fix this by adding to the tail instead.
+
+Also change the other enqueue point in the same function to put it on the
+front (ie. when we have a new connection).  This makes the point that in
+the case of a new connection the new call goes at the front (though it
+doesn't actually matter since the queue should be unoccupied).
+
+Fixes: 45025bceef17 ("rxrpc: Improve management and caching of client connection objects")
+Signed-off-by: David Howells <dhowells@redhat.com>
+Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rxrpc/conn_client.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/rxrpc/conn_client.c
++++ b/net/rxrpc/conn_client.c
+@@ -355,7 +355,7 @@ static int rxrpc_get_client_conn(struct
+        * normally have to take channel_lock but we do this before anyone else
+        * can see the connection.
+        */
+-      list_add_tail(&call->chan_wait_link, &candidate->waiting_calls);
++      list_add(&call->chan_wait_link, &candidate->waiting_calls);
+       if (cp->exclusive) {
+               call->conn = candidate;
+@@ -430,7 +430,7 @@ found_extant_conn:
+       spin_lock(&conn->channel_lock);
+       call->conn = conn;
+       call->security_ix = conn->security_ix;
+-      list_add(&call->chan_wait_link, &conn->waiting_calls);
++      list_add_tail(&call->chan_wait_link, &conn->waiting_calls);
+       spin_unlock(&conn->channel_lock);
+       _leave(" = 0 [extant %d]", conn->debug_id);
+       return 0;
index 8412ef33007025bba0d3256ded8b26391f18dc6b..bab8a958f2ab6a46a11ae333a82b7404e31c94ce 100644 (file)
@@ -1,2 +1,24 @@
 acpica-reference-counts-increase-max-to-0x4000-for-large-servers.patch
 keys-restrict-proc-keys-by-credentials-at-open-time.patch
+l2tp-fix-infoleak-in-l2tp_ip6_recvmsg.patch
+net-hsr-fix-memory-leak-in-hsr_dev_finalize.patch
+net-hsr-fix-possible-crash-in-add_timer.patch
+net-sit-fix-ubsan-undefined-behaviour-in-check_6rd.patch
+net-x25-fix-use-after-free-in-x25_device_event.patch
+net-x25-reset-state-in-x25_connect.patch
+pptp-dst_release-sk_dst_cache-in-pptp_sock_destruct.patch
+ravb-decrease-txfifo-depth-of-q3-and-q2-to-one.patch
+route-set-the-deleted-fnhe-fnhe_daddr-to-0-in-ip_del_fnhe-to-fix-a-race.patch
+tcp-handle-inet_csk_reqsk_queue_add-failures.patch
+vxlan-test-dev-flags-iff_up-before-calling-gro_cells_receive.patch
+net-mlx4_core-fix-reset-flow-when-in-command-polling-mode.patch
+net-mlx4_core-fix-locking-in-sriov-mode-when-switching-between-events-and-polling.patch
+net-mlx4_core-fix-qp-mtt-size-calculation.patch
+net-x25-fix-a-race-in-x25_bind.patch
+mdio_bus-fix-use-after-free-on-device_register-fails.patch
+net-set-rtm_table-to-rt_table_compat-for-ipv6-for-tables-255.patch
+missing-barriers-in-some-of-unix_sock-addr-and-path-accesses.patch
+ipvlan-disallow-userns-cap_net_admin-to-change-global-mode-flags.patch
+vxlan-fix-gro-cells-race-condition-between-receive-and-link-delete.patch
+rxrpc-fix-client-call-queueing-waiting-for-channel.patch
+gro_cells-make-sure-device-is-up-in-gro_cells_receive.patch
diff --git a/queue-4.9/tcp-handle-inet_csk_reqsk_queue_add-failures.patch b/queue-4.9/tcp-handle-inet_csk_reqsk_queue_add-failures.patch
new file mode 100644 (file)
index 0000000..55923b5
--- /dev/null
@@ -0,0 +1,66 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Guillaume Nault <gnault@redhat.com>
+Date: Fri, 8 Mar 2019 22:09:47 +0100
+Subject: tcp: handle inet_csk_reqsk_queue_add() failures
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[  Upstream commit 9d3e1368bb45893a75a5dfb7cd21fdebfa6b47af ]
+
+Commit 7716682cc58e ("tcp/dccp: fix another race at listener
+dismantle") let inet_csk_reqsk_queue_add() fail, and adjusted
+{tcp,dccp}_check_req() accordingly. However, TFO and syncookies
+weren't modified, thus leaking allocated resources on error.
+
+Contrary to tcp_check_req(), in both syncookies and TFO cases,
+we need to drop the request socket. Also, since the child socket is
+created with inet_csk_clone_lock(), we have to unlock it and drop an
+extra reference (->sk_refcount is initially set to 2 and
+inet_csk_reqsk_queue_add() drops only one ref).
+
+For TFO, we also need to revert the work done by tcp_try_fastopen()
+(with reqsk_fastopen_remove()).
+
+Fixes: 7716682cc58e ("tcp/dccp: fix another race at listener dismantle")
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/syncookies.c |    7 ++++++-
+ net/ipv4/tcp_input.c  |    8 +++++++-
+ 2 files changed, 13 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/syncookies.c
++++ b/net/ipv4/syncookies.c
+@@ -225,7 +225,12 @@ struct sock *tcp_get_cookie_sock(struct
+       if (child) {
+               atomic_set(&req->rsk_refcnt, 1);
+               sock_rps_save_rxhash(child, skb);
+-              inet_csk_reqsk_queue_add(sk, req, child);
++              if (!inet_csk_reqsk_queue_add(sk, req, child)) {
++                      bh_unlock_sock(child);
++                      sock_put(child);
++                      child = NULL;
++                      reqsk_put(req);
++              }
+       } else {
+               reqsk_free(req);
+       }
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -6479,7 +6479,13 @@ int tcp_conn_request(struct request_sock
+               af_ops->send_synack(fastopen_sk, dst, &fl, req,
+                                   &foc, TCP_SYNACK_FASTOPEN);
+               /* Add the child socket directly into the accept queue */
+-              inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
++              if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
++                      reqsk_fastopen_remove(fastopen_sk, req, false);
++                      bh_unlock_sock(fastopen_sk);
++                      sock_put(fastopen_sk);
++                      reqsk_put(req);
++                      goto drop;
++              }
+               sk->sk_data_ready(sk);
+               bh_unlock_sock(fastopen_sk);
+               sock_put(fastopen_sk);
diff --git a/queue-4.9/vxlan-fix-gro-cells-race-condition-between-receive-and-link-delete.patch b/queue-4.9/vxlan-fix-gro-cells-race-condition-between-receive-and-link-delete.patch
new file mode 100644 (file)
index 0000000..ebcfb60
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Fri, 8 Mar 2019 16:40:57 +0100
+Subject: vxlan: Fix GRO cells race condition between receive and link delete
+
+From: Stefano Brivio <sbrivio@redhat.com>
+
+[ Upstream commit ad6c9986bcb627c7c22b8f9e9a934becc27df87c ]
+
+If we receive a packet while deleting a VXLAN device, there's a chance
+vxlan_rcv() is called at the same time as vxlan_dellink(). This is fine,
+except that vxlan_dellink() should never ever touch stuff that's still in
+use, such as the GRO cells list.
+
+Otherwise, vxlan_rcv() crashes while queueing packets via
+gro_cells_receive().
+
+Move the gro_cells_destroy() to vxlan_uninit(), which runs after the RCU
+grace period is elapsed and nothing needs the gro_cells anymore.
+
+This is now done in the same way as commit 8e816df87997 ("geneve: Use GRO
+cells infrastructure.") originally implemented for GENEVE.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Fixes: 58ce31cca1ff ("vxlan: GRO support at tunnel layer")
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -2373,6 +2373,8 @@ static void vxlan_uninit(struct net_devi
+ {
+       struct vxlan_dev *vxlan = netdev_priv(dev);
++      gro_cells_destroy(&vxlan->gro_cells);
++
+       vxlan_fdb_delete_default(vxlan);
+       free_percpu(dev->tstats);
+@@ -3123,7 +3125,6 @@ static void vxlan_dellink(struct net_dev
+ {
+       struct vxlan_dev *vxlan = netdev_priv(dev);
+-      gro_cells_destroy(&vxlan->gro_cells);
+       list_del(&vxlan->next);
+       unregister_netdevice_queue(dev, head);
+ }
diff --git a/queue-4.9/vxlan-test-dev-flags-iff_up-before-calling-gro_cells_receive.patch b/queue-4.9/vxlan-test-dev-flags-iff_up-before-calling-gro_cells_receive.patch
new file mode 100644 (file)
index 0000000..5c3ccd8
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Fri Mar 15 21:00:09 PDT 2019
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 10 Mar 2019 10:36:40 -0700
+Subject: vxlan: test dev->flags & IFF_UP before calling gro_cells_receive()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 59cbf56fcd98ba2a715b6e97c4e43f773f956393 ]
+
+Same reasons than the ones explained in commit 4179cb5a4c92
+("vxlan: test dev->flags & IFF_UP before calling netif_rx()")
+
+netif_rx() or gro_cells_receive() must be called under a strict contract.
+
+At device dismantle phase, core networking clears IFF_UP
+and flush_all_backlogs() is called after rcu grace period
+to make sure no incoming packet might be in a cpu backlog
+and still referencing the device.
+
+A similar protocol is used for gro_cells infrastructure, as
+gro_cells_destroy() will be called only after a full rcu
+grace period is observed after IFF_UP has been cleared.
+
+Most drivers call netif_rx() from their interrupt handler,
+and since the interrupts are disabled at device dismantle,
+netif_rx() does not have to check dev->flags & IFF_UP
+
+Virtual drivers do not have this guarantee, and must
+therefore make the check themselves.
+
+Otherwise we risk use-after-free and/or crashes.
+
+Fixes: d342894c5d2f ("vxlan: virtual extensible lan")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -1380,6 +1380,14 @@ static int vxlan_rcv(struct sock *sk, st
+               goto drop;
+       }
++      rcu_read_lock();
++
++      if (unlikely(!(vxlan->dev->flags & IFF_UP))) {
++              rcu_read_unlock();
++              atomic_long_inc(&vxlan->dev->rx_dropped);
++              goto drop;
++      }
++
+       stats = this_cpu_ptr(vxlan->dev->tstats);
+       u64_stats_update_begin(&stats->syncp);
+       stats->rx_packets++;
+@@ -1387,6 +1395,9 @@ static int vxlan_rcv(struct sock *sk, st
+       u64_stats_update_end(&stats->syncp);
+       gro_cells_receive(&vxlan->gro_cells, skb);
++
++      rcu_read_unlock();
++
+       return 0;
+ drop: