From: Greg Kroah-Hartman Date: Thu, 8 Dec 2016 06:20:44 +0000 (+0100) Subject: 4.8-stable patches X-Git-Tag: v4.4.38~9 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7df090bc2695be68b69b7f0f8236def7673e8be8;p=thirdparty%2Fkernel%2Fstable-queue.git 4.8-stable patches added patches: af_unix-conditionally-use-freezable-blocking-calls-in-read.patch cdc_ether-fix-handling-connection-notification.patch geneve-avoid-use-after-free-of-skb-data.patch gro_cells-mark-napi-struct-as-not-busy-poll-candidates.patch gso-reload-iph-after-pskb_may_pull.patch ip6_offload-check-segs-for-null-in-ipv6_gso_segment.patch ip6_tunnel-disable-caching-when-the-traffic-class-is-inherited.patch ipv4-drop-leaf-from-suffix-pull-push-functions.patch ipv4-drop-suffix-update-from-resize-code.patch ipv4-fix-memory-leak-in-exception-case-for-splitting-tries.patch ipv4-restore-fib_trie_flush_external-function-and-fix-call-ordering.patch ipv6-bump-genid-when-the-ifa_f_tentative-flag-is-clear.patch l2tp-fix-racy-sock_zapped-flag-check-in-l2tp_ip-6-_bind.patch net-avoid-signed-overflows-for-so_-snd-rcv-bufforce.patch net-bcmgenet-utilize-correct-struct-device-for-all-dma-operations.patch net-check-dead-netns-for-peernet2id_alloc.patch net-dccp-fix-use-after-free-in-dccp_invalid_packet.patch net-dsa-b53-fix-vlan-usage-and-how-we-treat-cpu-port.patch net-dsa-bcm_sf2-ensure-we-re-negotiate-eee-during-after-link-change.patch net-dsa-fix-unbalanced-dsa_switch_tree-reference-counting.patch net-macb-fix-the-rx-queue-reset-in-macb_rx.patch net-ping-check-minimum-size-on-icmp-header-length.patch net-sched-pedit-make-sure-that-offset-is-valid.patch net-sched-respect-rcu-grace-period-on-cls-destruction.patch net-sky2-fix-shutdown-crash.patch netlink-call-cb-done-from-a-worker-thread.patch netlink-do-not-schedule-work-from-sk_destruct.patch packet-fix-race-condition-in-packet_set_ring.patch rtnetlink-fix-fdb-size-computation.patch rtnl-fix-the-loop-index-update-error-in-rtnl_dump_ifinfo.patch sh_eth-remove-unchecked-interrupts-for-rz-a1.patch sparc32-fix-inverted-invalid_frame_pointer-checks-on-sigreturns.patch sparc64-fix-compile-warning-section-mismatch-in-find_node.patch sparc64-fix-find_node-warning-if-numa-node-cannot-be-found.patch tipc-check-minimum-bearer-mtu.patch udplite-call-proper-backlog-handlers.patch virtio-net-add-a-missing-synchronize_net.patch --- diff --git a/queue-4.8/af_unix-conditionally-use-freezable-blocking-calls-in-read.patch b/queue-4.8/af_unix-conditionally-use-freezable-blocking-calls-in-read.patch new file mode 100644 index 00000000000..c3439236f16 --- /dev/null +++ b/queue-4.8/af_unix-conditionally-use-freezable-blocking-calls-in-read.patch @@ -0,0 +1,102 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: WANG Cong +Date: Thu, 17 Nov 2016 15:55:26 -0800 +Subject: af_unix: conditionally use freezable blocking calls in read + +From: WANG Cong + + +[ Upstream commit 06a77b07e3b44aea2b3c0e64de420ea2cfdcbaa9 ] + +Commit 2b15af6f95 ("af_unix: use freezable blocking calls in read") +converts schedule_timeout() to its freezable version, it was probably +correct at that time, but later, commit 2b514574f7e8 +("net: af_unix: implement splice for stream af_unix sockets") breaks +the strong requirement for a freezable sleep, according to +commit 0f9548ca1091: + + We shouldn't try_to_freeze if locks are held. Holding a lock can cause a + deadlock if the lock is later acquired in the suspend or hibernate path + (e.g. by dpm). Holding a lock can also cause a deadlock in the case of + cgroup_freezer if a lock is held inside a frozen cgroup that is later + acquired by a process outside that group. + +The pipe_lock is still held at that point. + +So use freezable version only for the recvmsg call path, avoid impact for +Android. + +Fixes: 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets") +Reported-by: Dmitry Vyukov +Cc: Tejun Heo +Cc: Colin Cross +Cc: Rafael J. Wysocki +Cc: Hannes Frederic Sowa +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2199,7 +2199,8 @@ out: + * Sleep until more data has arrived. But check for races.. + */ + static long unix_stream_data_wait(struct sock *sk, long timeo, +- struct sk_buff *last, unsigned int last_len) ++ struct sk_buff *last, unsigned int last_len, ++ bool freezable) + { + struct sk_buff *tail; + DEFINE_WAIT(wait); +@@ -2220,7 +2221,10 @@ static long unix_stream_data_wait(struct + + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + unix_state_unlock(sk); +- timeo = freezable_schedule_timeout(timeo); ++ if (freezable) ++ timeo = freezable_schedule_timeout(timeo); ++ else ++ timeo = schedule_timeout(timeo); + unix_state_lock(sk); + + if (sock_flag(sk, SOCK_DEAD)) +@@ -2250,7 +2254,8 @@ struct unix_stream_read_state { + unsigned int splice_flags; + }; + +-static int unix_stream_read_generic(struct unix_stream_read_state *state) ++static int unix_stream_read_generic(struct unix_stream_read_state *state, ++ bool freezable) + { + struct scm_cookie scm; + struct socket *sock = state->socket; +@@ -2330,7 +2335,7 @@ again: + mutex_unlock(&u->iolock); + + timeo = unix_stream_data_wait(sk, timeo, last, +- last_len); ++ last_len, freezable); + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); +@@ -2472,7 +2477,7 @@ static int unix_stream_recvmsg(struct so + .flags = flags + }; + +- return unix_stream_read_generic(&state); ++ return unix_stream_read_generic(&state, true); + } + + static ssize_t skb_unix_socket_splice(struct sock *sk, +@@ -2518,7 +2523,7 @@ static ssize_t unix_stream_splice_read(s + flags & SPLICE_F_NONBLOCK) + state.flags = MSG_DONTWAIT; + +- return unix_stream_read_generic(&state); ++ return unix_stream_read_generic(&state, false); + } + + static int unix_shutdown(struct socket *sock, int mode) diff --git a/queue-4.8/cdc_ether-fix-handling-connection-notification.patch b/queue-4.8/cdc_ether-fix-handling-connection-notification.patch new file mode 100644 index 00000000000..c1464628d92 --- /dev/null +++ b/queue-4.8/cdc_ether-fix-handling-connection-notification.patch @@ -0,0 +1,109 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Kristian Evensen +Date: Thu, 1 Dec 2016 14:23:17 +0100 +Subject: cdc_ether: Fix handling connection notification + +From: Kristian Evensen + + +[ Upstream commit d5c83d0d1d83b3798c71e0c8b7c3624d39c91d88 ] + +Commit bfe9b9d2df66 ("cdc_ether: Improve ZTE MF823/831/910 handling") +introduced a work-around in usbnet_cdc_status() for devices that exported +cdc carrier on twice on connect. Before the commit, this behavior caused +the link state to be incorrect. It was assumed that all CDC Ethernet +devices would either export this behavior, or send one off and then one on +notification (which seems to be the default behavior). + +Unfortunately, it turns out multiple devices sends a connection +notification multiple times per second (via an interrupt), even when +connection state does not change. This has been observed with several +different USB LAN dongles (at least), for example 13b1:0041 (Linksys). +After bfe9b9d2df66, the link state has been set as down and then up for +each notification. This has caused a flood of Netlink NEWLINK messages and +syslog to be flooded with messages similar to: + +cdc_ether 2-1:2.0 eth1: kevent 12 may have been dropped + +This commit fixes the behavior by reverting usbnet_cdc_status() to how it +was before bfe9b9d2df66. The work-around has been moved to a separate +status-function which is only called when a known, affect device is +detected. + +v1->v2: + +* Do not open-code netif_carrier_ok() (thanks Henning Schild). +* Call netif_carrier_off() instead of usb_link_change(). This prevents +calling schedule_work() twice without giving the work queue a chance to be +processed (thanks Bjørn Mork). + +Fixes: bfe9b9d2df66 ("cdc_ether: Improve ZTE MF823/831/910 handling") +Reported-by: Henning Schild +Signed-off-by: Kristian Evensen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/cdc_ether.c | 38 +++++++++++++++++++++++++++++++------- + 1 file changed, 31 insertions(+), 7 deletions(-) + +--- a/drivers/net/usb/cdc_ether.c ++++ b/drivers/net/usb/cdc_ether.c +@@ -388,12 +388,6 @@ void usbnet_cdc_status(struct usbnet *de + case USB_CDC_NOTIFY_NETWORK_CONNECTION: + netif_dbg(dev, timer, dev->net, "CDC: carrier %s\n", + event->wValue ? "on" : "off"); +- +- /* Work-around for devices with broken off-notifications */ +- if (event->wValue && +- !test_bit(__LINK_STATE_NOCARRIER, &dev->net->state)) +- usbnet_link_change(dev, 0, 0); +- + usbnet_link_change(dev, !!event->wValue, 0); + break; + case USB_CDC_NOTIFY_SPEED_CHANGE: /* tx/rx rates */ +@@ -466,6 +460,36 @@ static int usbnet_cdc_zte_rx_fixup(struc + return 1; + } + ++/* Ensure correct link state ++ * ++ * Some devices (ZTE MF823/831/910) export two carrier on notifications when ++ * connected. This causes the link state to be incorrect. Work around this by ++ * always setting the state to off, then on. ++ */ ++void usbnet_cdc_zte_status(struct usbnet *dev, struct urb *urb) ++{ ++ struct usb_cdc_notification *event; ++ ++ if (urb->actual_length < sizeof(*event)) ++ return; ++ ++ event = urb->transfer_buffer; ++ ++ if (event->bNotificationType != USB_CDC_NOTIFY_NETWORK_CONNECTION) { ++ usbnet_cdc_status(dev, urb); ++ return; ++ } ++ ++ netif_dbg(dev, timer, dev->net, "CDC: carrier %s\n", ++ event->wValue ? "on" : "off"); ++ ++ if (event->wValue && ++ netif_carrier_ok(dev->net)) ++ netif_carrier_off(dev->net); ++ ++ usbnet_link_change(dev, !!event->wValue, 0); ++} ++ + static const struct driver_info cdc_info = { + .description = "CDC Ethernet Device", + .flags = FLAG_ETHER | FLAG_POINTTOPOINT, +@@ -481,7 +505,7 @@ static const struct driver_info zte_cdc_ + .flags = FLAG_ETHER | FLAG_POINTTOPOINT, + .bind = usbnet_cdc_zte_bind, + .unbind = usbnet_cdc_unbind, +- .status = usbnet_cdc_status, ++ .status = usbnet_cdc_zte_status, + .set_rx_mode = usbnet_cdc_update_filter, + .manage_power = usbnet_manage_power, + .rx_fixup = usbnet_cdc_zte_rx_fixup, diff --git a/queue-4.8/geneve-avoid-use-after-free-of-skb-data.patch b/queue-4.8/geneve-avoid-use-after-free-of-skb-data.patch new file mode 100644 index 00000000000..dc8d5dd568d --- /dev/null +++ b/queue-4.8/geneve-avoid-use-after-free-of-skb-data.patch @@ -0,0 +1,96 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Sabrina Dubroca +Date: Fri, 2 Dec 2016 16:49:29 +0100 +Subject: geneve: avoid use-after-free of skb->data + +From: Sabrina Dubroca + + +[ Upstream commit 5b01014759991887b1e450c9def01e58c02ab81b ] + +geneve{,6}_build_skb can end up doing a pskb_expand_head(), which +makes the ip_hdr(skb) reference we stashed earlier stale. Since it's +only needed as an argument to ip_tunnel_ecn_encap(), move this +directly in the function call. + +Fixes: 08399efc6319 ("geneve: ensure ECN info is handled properly in all tx/rx paths") +Signed-off-by: Sabrina Dubroca +Reviewed-by: John W. Linville +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/geneve.c | 14 ++++---------- + 1 file changed, 4 insertions(+), 10 deletions(-) + +--- a/drivers/net/geneve.c ++++ b/drivers/net/geneve.c +@@ -844,7 +844,6 @@ static netdev_tx_t geneve_xmit_skb(struc + struct geneve_dev *geneve = netdev_priv(dev); + struct geneve_sock *gs4 = geneve->sock4; + struct rtable *rt = NULL; +- const struct iphdr *iip; /* interior IP header */ + int err = -EINVAL; + struct flowi4 fl4; + __u8 tos, ttl; +@@ -871,8 +870,6 @@ static netdev_tx_t geneve_xmit_skb(struc + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); + skb_reset_mac_header(skb); + +- iip = ip_hdr(skb); +- + if (info) { + const struct ip_tunnel_key *key = &info->key; + u8 *opts = NULL; +@@ -892,7 +889,7 @@ static netdev_tx_t geneve_xmit_skb(struc + if (unlikely(err)) + goto tx_error; + +- tos = ip_tunnel_ecn_encap(key->tos, iip, skb); ++ tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); + ttl = key->ttl; + df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + } else { +@@ -901,7 +898,7 @@ static netdev_tx_t geneve_xmit_skb(struc + if (unlikely(err)) + goto tx_error; + +- tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb); ++ tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb); + ttl = geneve->ttl; + if (!ttl && IN_MULTICAST(ntohl(fl4.daddr))) + ttl = 1; +@@ -934,7 +931,6 @@ static netdev_tx_t geneve6_xmit_skb(stru + struct geneve_dev *geneve = netdev_priv(dev); + struct geneve_sock *gs6 = geneve->sock6; + struct dst_entry *dst = NULL; +- const struct iphdr *iip; /* interior IP header */ + int err = -EINVAL; + struct flowi6 fl6; + __u8 prio, ttl; +@@ -959,8 +955,6 @@ static netdev_tx_t geneve6_xmit_skb(stru + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); + skb_reset_mac_header(skb); + +- iip = ip_hdr(skb); +- + if (info) { + const struct ip_tunnel_key *key = &info->key; + u8 *opts = NULL; +@@ -981,7 +975,7 @@ static netdev_tx_t geneve6_xmit_skb(stru + if (unlikely(err)) + goto tx_error; + +- prio = ip_tunnel_ecn_encap(key->tos, iip, skb); ++ prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); + ttl = key->ttl; + label = info->key.label; + } else { +@@ -991,7 +985,7 @@ static netdev_tx_t geneve6_xmit_skb(stru + goto tx_error; + + prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel), +- iip, skb); ++ ip_hdr(skb), skb); + ttl = geneve->ttl; + if (!ttl && ipv6_addr_is_multicast(&fl6.daddr)) + ttl = 1; diff --git a/queue-4.8/gro_cells-mark-napi-struct-as-not-busy-poll-candidates.patch b/queue-4.8/gro_cells-mark-napi-struct-as-not-busy-poll-candidates.patch new file mode 100644 index 00000000000..e4389915da3 --- /dev/null +++ b/queue-4.8/gro_cells-mark-napi-struct-as-not-busy-poll-candidates.patch @@ -0,0 +1,52 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Eric Dumazet +Date: Mon, 14 Nov 2016 16:28:42 -0800 +Subject: gro_cells: mark napi struct as not busy poll candidates + +From: Eric Dumazet + + +[ Upstream commit e88a2766143a27bfe6704b4493b214de4094cf29 ] + +Rolf Neugebauer reported very long delays at netns dismantle. + +Eric W. Biederman was kind enough to look at this problem +and noticed synchronize_net() occurring from netif_napi_del() that was +added in linux-4.5 + +Busy polling makes no sense for tunnels NAPI. +If busy poll is used for sessions over tunnels, the poller will need to +poll the physical device queue anyway. + +netif_tx_napi_add() could be used here, but function name is misleading, +and renaming it is not stable material, so set NAPI_STATE_NO_BUSY_POLL +bit directly. + +This will avoid inserting gro_cells napi structures in napi_hash[] +and avoid the problematic synchronize_net() (per possible cpu) that +Rolf reported. + +Fixes: 93d05d4a320c ("net: provide generic busy polling to all NAPI drivers") +Signed-off-by: Eric Dumazet +Reported-by: Rolf Neugebauer +Reported-by: Eric W. Biederman +Acked-by: Cong Wang +Tested-by: Rolf Neugebauer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/gro_cells.h | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/include/net/gro_cells.h ++++ b/include/net/gro_cells.h +@@ -68,6 +68,9 @@ static inline int gro_cells_init(struct + struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); + + __skb_queue_head_init(&cell->napi_skbs); ++ ++ set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); ++ + netif_napi_add(dev, &cell->napi, gro_cell_poll, 64); + napi_enable(&cell->napi); + } diff --git a/queue-4.8/gso-reload-iph-after-pskb_may_pull.patch b/queue-4.8/gso-reload-iph-after-pskb_may_pull.patch new file mode 100644 index 00000000000..75b13c4ff76 --- /dev/null +++ b/queue-4.8/gso-reload-iph-after-pskb_may_pull.patch @@ -0,0 +1,35 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Arnaldo Carvalho de Melo +Date: Mon, 28 Nov 2016 12:36:58 -0300 +Subject: GSO: Reload iph after pskb_may_pull + +From: Arnaldo Carvalho de Melo + + +[ Upstream commit a510887824171ad260cc4a2603396c6247fdd091 ] + +As it may get stale and lead to use after free. + +Acked-by: Eric Dumazet +Cc: Alexander Duyck +Cc: Andrey Konovalov +Fixes: cbc53e08a793 ("GSO: Add GSO type for fixed IPv4 ID") +Signed-off-by: Arnaldo Carvalho de Melo +Acked-by: Alexander Duyck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/af_inet.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -1237,7 +1237,7 @@ struct sk_buff *inet_gso_segment(struct + fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); + + /* fixed ID is invalid if DF bit is not set */ +- if (fixedid && !(iph->frag_off & htons(IP_DF))) ++ if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF))) + goto out; + } + diff --git a/queue-4.8/ip6_offload-check-segs-for-null-in-ipv6_gso_segment.patch b/queue-4.8/ip6_offload-check-segs-for-null-in-ipv6_gso_segment.patch new file mode 100644 index 00000000000..722f71c1ee2 --- /dev/null +++ b/queue-4.8/ip6_offload-check-segs-for-null-in-ipv6_gso_segment.patch @@ -0,0 +1,111 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Artem Savkov +Date: Thu, 1 Dec 2016 14:06:04 +0100 +Subject: ip6_offload: check segs for NULL in ipv6_gso_segment. + +From: Artem Savkov + + +[ Upstream commit 6b6ebb6b01c873d0cfe3449e8a1219ee6e5fc022 ] + +segs needs to be checked for being NULL in ipv6_gso_segment() before calling +skb_shinfo(segs), otherwise kernel can run into a NULL-pointer dereference: + +[ 97.811262] BUG: unable to handle kernel NULL pointer dereference at 00000000000000cc +[ 97.819112] IP: [] ipv6_gso_segment+0x119/0x2f0 +[ 97.825214] PGD 0 [ 97.827047] +[ 97.828540] Oops: 0000 [#1] SMP +[ 97.831678] Modules linked in: vhost_net vhost macvtap macvlan nfsv3 rpcsec_gss_krb5 +nfsv4 dns_resolver nfs fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 +iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack +ipt_REJECT nf_reject_ipv4 tun ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter +bridge stp llc snd_hda_codec_realtek snd_hda_codec_hdmi snd_hda_codec_generic snd_hda_intel +snd_hda_codec edac_mce_amd snd_hda_core edac_core snd_hwdep kvm_amd snd_seq kvm snd_seq_device +snd_pcm irqbypass snd_timer ppdev parport_serial snd parport_pc k10temp pcspkr soundcore parport +sp5100_tco shpchp sg wmi i2c_piix4 acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc +ip_tables xfs libcrc32c sr_mod cdrom sd_mod ata_generic pata_acpi amdkfd amd_iommu_v2 radeon +broadcom bcm_phy_lib i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops +ttm ahci serio_raw tg3 firewire_ohci libahci pata_atiixp drm ptp libata firewire_core pps_core +i2c_core crc_itu_t fjes dm_mirror dm_region_hash dm_log dm_mod +[ 97.927721] CPU: 1 PID: 3504 Comm: vhost-3495 Not tainted 4.9.0-7.el7.test.x86_64 #1 +[ 97.935457] Hardware name: AMD Snook/Snook, BIOS ESK0726A 07/26/2010 +[ 97.941806] task: ffff880129a1c080 task.stack: ffffc90001bcc000 +[ 97.947720] RIP: 0010:[] [] ipv6_gso_segment+0x119/0x2f0 +[ 97.956251] RSP: 0018:ffff88012fc43a10 EFLAGS: 00010207 +[ 97.961557] RAX: 0000000000000000 RBX: ffff8801292c8700 RCX: 0000000000000594 +[ 97.968687] RDX: 0000000000000593 RSI: ffff880129a846c0 RDI: 0000000000240000 +[ 97.975814] RBP: ffff88012fc43a68 R08: ffff880129a8404e R09: 0000000000000000 +[ 97.982942] R10: 0000000000000000 R11: ffff880129a84076 R12: 00000020002949b3 +[ 97.990070] R13: ffff88012a580000 R14: 0000000000000000 R15: ffff88012a580000 +[ 97.997198] FS: 0000000000000000(0000) GS:ffff88012fc40000(0000) knlGS:0000000000000000 +[ 98.005280] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 98.011021] CR2: 00000000000000cc CR3: 0000000126c5d000 CR4: 00000000000006e0 +[ 98.018149] Stack: +[ 98.020157] 00000000ffffffff ffff88012fc43ac8 ffffffffa017ad0a 000000000000000e +[ 98.027584] 0000001300000000 0000000077d59998 ffff8801292c8700 00000020002949b3 +[ 98.035010] ffff88012a580000 0000000000000000 ffff88012a580000 ffff88012fc43a98 +[ 98.042437] Call Trace: +[ 98.044879] [ 98.046803] [] ? tg3_start_xmit+0x84a/0xd60 [tg3] +[ 98.053156] [] skb_mac_gso_segment+0xb0/0x130 +[ 98.059158] [] __skb_gso_segment+0x73/0x110 +[ 98.064985] [] validate_xmit_skb+0x12d/0x2b0 +[ 98.070899] [] validate_xmit_skb_list+0x42/0x70 +[ 98.077073] [] sch_direct_xmit+0xd0/0x1b0 +[ 98.082726] [] __dev_queue_xmit+0x486/0x690 +[ 98.088554] [] ? cpumask_next_and+0x35/0x50 +[ 98.094380] [] dev_queue_xmit+0x10/0x20 +[ 98.099863] [] br_dev_queue_push_xmit+0xa7/0x170 [bridge] +[ 98.106907] [] br_forward_finish+0x41/0xc0 [bridge] +[ 98.113430] [] ? nf_iterate+0x52/0x60 +[ 98.118735] [] ? nf_hook_slow+0x6b/0xc0 +[ 98.124216] [] __br_forward+0x14c/0x1e0 [bridge] +[ 98.130480] [] ? br_dev_queue_push_xmit+0x170/0x170 [bridge] +[ 98.137785] [] br_forward+0x9d/0xb0 [bridge] +[ 98.143701] [] br_handle_frame_finish+0x267/0x560 [bridge] +[ 98.150834] [] br_handle_frame+0x174/0x2f0 [bridge] +[ 98.157355] [] ? sched_clock+0x9/0x10 +[ 98.162662] [] ? sched_clock_cpu+0x72/0xa0 +[ 98.168403] [] __netif_receive_skb_core+0x1e5/0xa20 +[ 98.174926] [] ? timerqueue_add+0x59/0xb0 +[ 98.180580] [] __netif_receive_skb+0x18/0x60 +[ 98.186494] [] process_backlog+0x95/0x140 +[ 98.192145] [] net_rx_action+0x16d/0x380 +[ 98.197713] [] __do_softirq+0xd1/0x283 +[ 98.203106] [] do_softirq_own_stack+0x1c/0x30 +[ 98.209107] [ 98.211029] [] do_softirq+0x50/0x60 +[ 98.216166] [] netif_rx_ni+0x33/0x80 +[ 98.221386] [] tun_get_user+0x487/0x7f0 [tun] +[ 98.227388] [] tun_sendmsg+0x4b/0x60 [tun] +[ 98.233129] [] handle_tx+0x282/0x540 [vhost_net] +[ 98.239392] [] handle_tx_kick+0x15/0x20 [vhost_net] +[ 98.245916] [] vhost_worker+0x9e/0xf0 [vhost] +[ 98.251919] [] ? vhost_umem_alloc+0x40/0x40 [vhost] +[ 98.258440] [] ? do_syscall_64+0x67/0x180 +[ 98.264094] [] kthread+0xd9/0xf0 +[ 98.268965] [] ? kthread_park+0x60/0x60 +[ 98.274444] [] ret_from_fork+0x25/0x30 +[ 98.279836] Code: 8b 93 d8 00 00 00 48 2b 93 d0 00 00 00 4c 89 e6 48 89 df 66 89 93 c2 00 00 00 ff 10 48 3d 00 f0 ff ff 49 89 c2 0f 87 52 01 00 00 <41> 8b 92 cc 00 00 00 48 8b 80 d0 00 00 00 44 0f b7 74 10 06 66 +[ 98.299425] RIP [] ipv6_gso_segment+0x119/0x2f0 +[ 98.305612] RSP +[ 98.309094] CR2: 00000000000000cc +[ 98.312406] ---[ end trace 726a2c7a2d2d78d0 ]--- + +Signed-off-by: Artem Savkov +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_offload.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/ip6_offload.c ++++ b/net/ipv6/ip6_offload.c +@@ -98,7 +98,7 @@ static struct sk_buff *ipv6_gso_segment( + segs = ops->callbacks.gso_segment(skb, features); + } + +- if (IS_ERR(segs)) ++ if (IS_ERR_OR_NULL(segs)) + goto out; + + for (skb = segs; skb; skb = skb->next) { diff --git a/queue-4.8/ip6_tunnel-disable-caching-when-the-traffic-class-is-inherited.patch b/queue-4.8/ip6_tunnel-disable-caching-when-the-traffic-class-is-inherited.patch new file mode 100644 index 00000000000..e5b2ff22c92 --- /dev/null +++ b/queue-4.8/ip6_tunnel-disable-caching-when-the-traffic-class-is-inherited.patch @@ -0,0 +1,62 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Paolo Abeni +Date: Wed, 16 Nov 2016 16:26:46 +0100 +Subject: ip6_tunnel: disable caching when the traffic class is inherited + +From: Paolo Abeni + + +[ Upstream commit b5c2d49544e5930c96e2632a7eece3f4325a1888 ] + +If an ip6 tunnel is configured to inherit the traffic class from +the inner header, the dst_cache must be disabled or it will foul +the policy routing. + +The issue is apprently there since at leat Linux-2.6.12-rc2. + +Reported-by: Liam McBirnie +Cc: Liam McBirnie +Acked-by: Hannes Frederic Sowa +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1014,6 +1014,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, st + int mtu; + unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; + unsigned int max_headroom = psh_hlen; ++ bool use_cache = false; + int err = -1; + + /* NBMA tunnel */ +@@ -1038,7 +1039,15 @@ int ip6_tnl_xmit(struct sk_buff *skb, st + + memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); + neigh_release(neigh); +- } else if (!fl6->flowi6_mark) ++ } else if (!(t->parms.flags & ++ (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { ++ /* enable the cache only only if the routing decision does ++ * not depend on the current inner header value ++ */ ++ use_cache = true; ++ } ++ ++ if (use_cache) + dst = dst_cache_get(&t->dst_cache); + + if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) +@@ -1113,7 +1122,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, st + skb = new_skb; + } + +- if (!fl6->flowi6_mark && ndst) ++ if (use_cache && ndst) + dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); + skb_dst_set(skb, dst); + diff --git a/queue-4.8/ipv4-drop-leaf-from-suffix-pull-push-functions.patch b/queue-4.8/ipv4-drop-leaf-from-suffix-pull-push-functions.patch new file mode 100644 index 00000000000..8e5c2302ec0 --- /dev/null +++ b/queue-4.8/ipv4-drop-leaf-from-suffix-pull-push-functions.patch @@ -0,0 +1,87 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Alexander Duyck +Date: Thu, 1 Dec 2016 07:27:52 -0500 +Subject: ipv4: Drop leaf from suffix pull/push functions + +From: Alexander Duyck + + +[ Upstream commit 1a239173cccff726b60ac6a9c79ae4a1e26cfa49 ] + +It wasn't necessary to pass a leaf in when doing the suffix updates so just +drop it. Instead just pass the suffix and work with that. + +Since we dropped the leaf there is no need to include that in the name so +the names are updated to node_push_suffix and node_pull_suffix. + +Finally I noticed that the logic for pulling the suffix length back +actually had some issues. Specifically it would stop prematurely if there +was a longer suffix, but it was not as long as the original suffix. I +updated the code to address that in node_pull_suffix. + +Fixes: 5405afd1a306 ("fib_trie: Add tracking value for suffix length") +Suggested-by: Robert Shearman +Signed-off-by: Alexander Duyck +Reviewed-by: Robert Shearman +Tested-by: Robert Shearman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_trie.c | 26 ++++++++++++++------------ + 1 file changed, 14 insertions(+), 12 deletions(-) + +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -892,22 +892,24 @@ static struct key_vector *resize(struct + return tp; + } + +-static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l) ++static void node_pull_suffix(struct key_vector *tn, unsigned char slen) + { +- while ((tp->slen > tp->pos) && (tp->slen > l->slen)) { +- if (update_suffix(tp) > l->slen) ++ unsigned char node_slen = tn->slen; ++ ++ while ((node_slen > tn->pos) && (node_slen > slen)) { ++ slen = update_suffix(tn); ++ if (node_slen == slen) + break; +- tp = node_parent(tp); ++ ++ tn = node_parent(tn); ++ node_slen = tn->slen; + } + } + +-static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l) ++static void node_push_suffix(struct key_vector *tn, unsigned char slen) + { +- /* if this is a new leaf then tn will be NULL and we can sort +- * out parent suffix lengths as a part of trie_rebalance +- */ +- while (tn->slen < l->slen) { +- tn->slen = l->slen; ++ while (tn->slen < slen) { ++ tn->slen = slen; + tn = node_parent(tn); + } + } +@@ -1069,7 +1071,7 @@ static int fib_insert_alias(struct trie + /* if we added to the tail node then we need to update slen */ + if (l->slen < new->fa_slen) { + l->slen = new->fa_slen; +- leaf_push_suffix(tp, l); ++ node_push_suffix(tp, new->fa_slen); + } + + return 0; +@@ -1482,7 +1484,7 @@ static void fib_remove_alias(struct trie + + /* update the trie with the latest suffix length */ + l->slen = fa->fa_slen; +- leaf_pull_suffix(tp, l); ++ node_pull_suffix(tp, fa->fa_slen); + } + + /* Caller must hold RTNL. */ diff --git a/queue-4.8/ipv4-drop-suffix-update-from-resize-code.patch b/queue-4.8/ipv4-drop-suffix-update-from-resize-code.patch new file mode 100644 index 00000000000..fb055cfd747 --- /dev/null +++ b/queue-4.8/ipv4-drop-suffix-update-from-resize-code.patch @@ -0,0 +1,124 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Alexander Duyck +Date: Thu, 1 Dec 2016 07:27:57 -0500 +Subject: ipv4: Drop suffix update from resize code + +From: Alexander Duyck + + +[ Upstream commit a52ca62c4a6771028da9c1de934cdbcd93d54bb4 ] + +It has been reported that update_suffix can be expensive when it is called +on a large node in which most of the suffix lengths are the same. The time +required to add 200K entries had increased from around 3 seconds to almost +49 seconds. + +In order to address this we need to move the code for updating the suffix +out of resize and instead just have it handled in the cases where we are +pushing a node that increases the suffix length, or will decrease the +suffix length. + +Fixes: 5405afd1a306 ("fib_trie: Add tracking value for suffix length") +Reported-by: Robert Shearman +Signed-off-by: Alexander Duyck +Reviewed-by: Robert Shearman +Tested-by: Robert Shearman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_trie.c | 42 +++++++++++++++++++++--------------------- + 1 file changed, 21 insertions(+), 21 deletions(-) + +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -681,6 +681,13 @@ static unsigned char update_suffix(struc + { + unsigned char slen = tn->pos; + unsigned long stride, i; ++ unsigned char slen_max; ++ ++ /* only vector 0 can have a suffix length greater than or equal to ++ * tn->pos + tn->bits, the second highest node will have a suffix ++ * length at most of tn->pos + tn->bits - 1 ++ */ ++ slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen); + + /* search though the list of children looking for nodes that might + * have a suffix greater than the one we currently have. This is +@@ -698,12 +705,8 @@ static unsigned char update_suffix(struc + slen = n->slen; + i &= ~(stride - 1); + +- /* if slen covers all but the last bit we can stop here +- * there will be nothing longer than that since only node +- * 0 and 1 << (bits - 1) could have that as their suffix +- * length. +- */ +- if ((slen + 1) >= (tn->pos + tn->bits)) ++ /* stop searching if we have hit the maximum possible value */ ++ if (slen >= slen_max) + break; + } + +@@ -875,21 +878,7 @@ static struct key_vector *resize(struct + return collapse(t, tn); + + /* update parent in case halve failed */ +- tp = node_parent(tn); +- +- /* Return if at least one deflate was run */ +- if (max_work != MAX_WORK) +- return tp; +- +- /* push the suffix length to the parent node */ +- if (tn->slen > tn->pos) { +- unsigned char slen = update_suffix(tn); +- +- if (slen > tp->slen) +- tp->slen = slen; +- } +- +- return tp; ++ return node_parent(tn); + } + + static void node_pull_suffix(struct key_vector *tn, unsigned char slen) +@@ -1030,6 +1019,7 @@ static int fib_insert_node(struct trie * + } + + /* Case 3: n is NULL, and will just insert a new leaf */ ++ node_push_suffix(tp, new->fa_slen); + NODE_INIT_PARENT(l, tp); + put_child_root(tp, key, l); + trie_rebalance(t, tp); +@@ -1472,6 +1462,8 @@ static void fib_remove_alias(struct trie + * out parent suffix lengths as a part of trie_rebalance + */ + if (hlist_empty(&l->leaf)) { ++ if (tp->slen == l->slen) ++ node_pull_suffix(tp, tp->pos); + put_child_root(tp, l->key, NULL); + node_free(l); + trie_rebalance(t, tp); +@@ -1755,6 +1747,10 @@ void fib_table_flush_external(struct fib + if (IS_TRIE(pn)) + break; + ++ /* update the suffix to address pulled leaves */ ++ if (pn->slen > pn->pos) ++ update_suffix(pn); ++ + /* resize completed node */ + pn = resize(t, pn); + cindex = get_index(pkey, pn); +@@ -1830,6 +1826,10 @@ int fib_table_flush(struct fib_table *tb + if (IS_TRIE(pn)) + break; + ++ /* update the suffix to address pulled leaves */ ++ if (pn->slen > pn->pos) ++ update_suffix(pn); ++ + /* resize completed node */ + pn = resize(t, pn); + cindex = get_index(pkey, pn); diff --git a/queue-4.8/ipv4-fix-memory-leak-in-exception-case-for-splitting-tries.patch b/queue-4.8/ipv4-fix-memory-leak-in-exception-case-for-splitting-tries.patch new file mode 100644 index 00000000000..6530af9236d --- /dev/null +++ b/queue-4.8/ipv4-fix-memory-leak-in-exception-case-for-splitting-tries.patch @@ -0,0 +1,37 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Alexander Duyck +Date: Tue, 15 Nov 2016 05:46:12 -0500 +Subject: ipv4: Fix memory leak in exception case for splitting tries + +From: Alexander Duyck + + +[ Upstream commit 3114cdfe66c156345b0ae34e2990472f277e0c1b ] + +Fix a small memory leak that can occur where we leak a fib_alias in the +event of us not being able to insert it into the local table. + +Fixes: 0ddcf43d5d4a0 ("ipv4: FIB Local/MAIN table collapse") +Reported-by: Eric Dumazet +Signed-off-by: Alexander Duyck +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_trie.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -1713,8 +1713,10 @@ struct fib_table *fib_trie_unmerge(struc + local_l = fib_find_node(lt, &local_tp, l->key); + + if (fib_insert_alias(lt, local_tp, local_l, new_fa, +- NULL, l->key)) ++ NULL, l->key)) { ++ kmem_cache_free(fn_alias_kmem, new_fa); + goto out; ++ } + } + + /* stop loop if key wrapped back to 0 */ diff --git a/queue-4.8/ipv4-restore-fib_trie_flush_external-function-and-fix-call-ordering.patch b/queue-4.8/ipv4-restore-fib_trie_flush_external-function-and-fix-call-ordering.patch new file mode 100644 index 00000000000..7f531e59c18 --- /dev/null +++ b/queue-4.8/ipv4-restore-fib_trie_flush_external-function-and-fix-call-ordering.patch @@ -0,0 +1,70 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Alexander Duyck +Date: Tue, 15 Nov 2016 05:46:06 -0500 +Subject: ipv4: Restore fib_trie_flush_external function and fix call ordering + +From: Alexander Duyck + + +[ Upstream commit 3b7093346b326e5d3590c7d49f6aefe6fa5b2c9a, the FIB offload + removal didn't occur in 4.8 so that part of this patch isn't here. However + we still need to fib_unmerge() bits. ] + +The patch that removed the FIB offload infrastructure was a bit too +aggressive and also removed code needed to clean up us splitting the table +if additional rules were added. Specifically the function +fib_trie_flush_external was called at the end of a new rule being added to +flush the foreign trie entries from the main trie. + +I updated the code so that we only call fib_trie_flush_external on the main +table so that we flush the entries for local from main. This way we don't +call it for every rule change which is what was happening previously. + +Fixes: 347e3b28c1ba2 ("switchdev: remove FIB offload infrastructure") +Reported-by: Eric Dumazet +Cc: Jiri Pirko +Signed-off-by: Alexander Duyck +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_frontend.c | 20 +++++++++++++++----- + 1 file changed, 15 insertions(+), 5 deletions(-) + +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -157,7 +157,7 @@ static void fib_replace_table(struct net + + int fib_unmerge(struct net *net) + { +- struct fib_table *old, *new; ++ struct fib_table *old, *new, *main_table; + + /* attempt to fetch local table if it has been allocated */ + old = fib_get_table(net, RT_TABLE_LOCAL); +@@ -168,11 +168,21 @@ int fib_unmerge(struct net *net) + if (!new) + return -ENOMEM; + ++ /* table is already unmerged */ ++ if (new == old) ++ return 0; ++ + /* replace merged table with clean table */ +- if (new != old) { +- fib_replace_table(net, old, new); +- fib_free_table(old); +- } ++ fib_replace_table(net, old, new); ++ fib_free_table(old); ++ ++ /* attempt to fetch main table if it has been allocated */ ++ main_table = fib_get_table(net, RT_TABLE_MAIN); ++ if (!main_table) ++ return 0; ++ ++ /* flush local entries from main table */ ++ fib_table_flush_external(main_table); + + return 0; + } diff --git a/queue-4.8/ipv6-bump-genid-when-the-ifa_f_tentative-flag-is-clear.patch b/queue-4.8/ipv6-bump-genid-when-the-ifa_f_tentative-flag-is-clear.patch new file mode 100644 index 00000000000..8a467d38263 --- /dev/null +++ b/queue-4.8/ipv6-bump-genid-when-the-ifa_f_tentative-flag-is-clear.patch @@ -0,0 +1,124 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Paolo Abeni +Date: Tue, 22 Nov 2016 16:57:40 +0100 +Subject: ipv6: bump genid when the IFA_F_TENTATIVE flag is clear + +From: Paolo Abeni + + +[ Upstream commit 764d3be6e415b40056834bfd29b994dc3f837606 ] + +When an ipv6 address has the tentative flag set, it can't be +used as source for egress traffic, while the associated route, +if any, can be looked up and even stored into some dst_cache. + +In the latter scenario, the source ipv6 address selected and +stored in the cache is most probably wrong (e.g. with +link-local scope) and the entity using the dst_cache will +experience lack of ipv6 connectivity until said cache is +cleared or invalidated. + +Overall this may cause lack of connectivity over most IPv6 tunnels +(comprising geneve and vxlan), if the first egress packet reaches +the tunnel before the DaD is completed for the used ipv6 +address. + +This patch bumps a new genid after that the IFA_F_TENTATIVE flag +is cleared, so that dst_cache will be invalidated on +next lookup and ipv6 connectivity restored. + +Fixes: 0c1d70af924b ("net: use dst_cache for vxlan device") +Fixes: 468dfffcd762 ("geneve: add dst caching support") +Acked-by: Hannes Frederic Sowa +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -163,7 +163,7 @@ static struct rt6_info *addrconf_get_pre + + static void addrconf_dad_start(struct inet6_ifaddr *ifp); + static void addrconf_dad_work(struct work_struct *w); +-static void addrconf_dad_completed(struct inet6_ifaddr *ifp); ++static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id); + static void addrconf_dad_run(struct inet6_dev *idev); + static void addrconf_rs_timer(unsigned long data); + static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); +@@ -2893,6 +2893,7 @@ static void add_addr(struct inet6_dev *i + spin_lock_bh(&ifp->lock); + ifp->flags &= ~IFA_F_TENTATIVE; + spin_unlock_bh(&ifp->lock); ++ rt_genid_bump_ipv6(dev_net(idev->dev)); + ipv6_ifa_notify(RTM_NEWADDR, ifp); + in6_ifa_put(ifp); + } +@@ -3736,7 +3737,7 @@ static void addrconf_dad_begin(struct in + { + struct inet6_dev *idev = ifp->idev; + struct net_device *dev = idev->dev; +- bool notify = false; ++ bool bump_id, notify = false; + + addrconf_join_solict(dev, &ifp->addr); + +@@ -3751,11 +3752,12 @@ static void addrconf_dad_begin(struct in + idev->cnf.accept_dad < 1 || + !(ifp->flags&IFA_F_TENTATIVE) || + ifp->flags & IFA_F_NODAD) { ++ bump_id = ifp->flags & IFA_F_TENTATIVE; + ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); + spin_unlock(&ifp->lock); + read_unlock_bh(&idev->lock); + +- addrconf_dad_completed(ifp); ++ addrconf_dad_completed(ifp, bump_id); + return; + } + +@@ -3815,8 +3817,8 @@ static void addrconf_dad_work(struct wor + struct inet6_ifaddr, + dad_work); + struct inet6_dev *idev = ifp->idev; ++ bool bump_id, disable_ipv6 = false; + struct in6_addr mcaddr; +- bool disable_ipv6 = false; + + enum { + DAD_PROCESS, +@@ -3886,11 +3888,12 @@ static void addrconf_dad_work(struct wor + * DAD was successful + */ + ++ bump_id = ifp->flags & IFA_F_TENTATIVE; + ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); + spin_unlock(&ifp->lock); + write_unlock_bh(&idev->lock); + +- addrconf_dad_completed(ifp); ++ addrconf_dad_completed(ifp, bump_id); + + goto out; + } +@@ -3927,7 +3930,7 @@ static bool ipv6_lonely_lladdr(struct in + return true; + } + +-static void addrconf_dad_completed(struct inet6_ifaddr *ifp) ++static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id) + { + struct net_device *dev = ifp->idev->dev; + struct in6_addr lladdr; +@@ -3978,6 +3981,9 @@ static void addrconf_dad_completed(struc + spin_unlock(&ifp->lock); + write_unlock_bh(&ifp->idev->lock); + } ++ ++ if (bump_id) ++ rt_genid_bump_ipv6(dev_net(dev)); + } + + static void addrconf_dad_run(struct inet6_dev *idev) diff --git a/queue-4.8/l2tp-fix-racy-sock_zapped-flag-check-in-l2tp_ip-6-_bind.patch b/queue-4.8/l2tp-fix-racy-sock_zapped-flag-check-in-l2tp_ip-6-_bind.patch new file mode 100644 index 00000000000..774e3aab914 --- /dev/null +++ b/queue-4.8/l2tp-fix-racy-sock_zapped-flag-check-in-l2tp_ip-6-_bind.patch @@ -0,0 +1,166 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Guillaume Nault +Date: Fri, 18 Nov 2016 22:13:00 +0100 +Subject: l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{,6}_bind() + +From: Guillaume Nault + + +[ Upstream commit 32c231164b762dddefa13af5a0101032c70b50ef ] + +Lock socket before checking the SOCK_ZAPPED flag in l2tp_ip6_bind(). +Without lock, a concurrent call could modify the socket flags between +the sock_flag(sk, SOCK_ZAPPED) test and the lock_sock() call. This way, +a socket could be inserted twice in l2tp_ip6_bind_table. Releasing it +would then leave a stale pointer there, generating use-after-free +errors when walking through the list or modifying adjacent entries. + +BUG: KASAN: use-after-free in l2tp_ip6_close+0x22e/0x290 at addr ffff8800081b0ed8 +Write of size 8 by task syz-executor/10987 +CPU: 0 PID: 10987 Comm: syz-executor Not tainted 4.8.0+ #39 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014 + ffff880031d97838 ffffffff829f835b ffff88001b5a1640 ffff8800081b0ec0 + ffff8800081b15a0 ffff8800081b6d20 ffff880031d97860 ffffffff8174d3cc + ffff880031d978f0 ffff8800081b0e80 ffff88001b5a1640 ffff880031d978e0 +Call Trace: + [] dump_stack+0xb3/0x118 lib/dump_stack.c:15 + [] kasan_object_err+0x1c/0x70 mm/kasan/report.c:156 + [< inline >] print_address_description mm/kasan/report.c:194 + [] kasan_report_error+0x1f6/0x4d0 mm/kasan/report.c:283 + [< inline >] kasan_report mm/kasan/report.c:303 + [] __asan_report_store8_noabort+0x3e/0x40 mm/kasan/report.c:329 + [< inline >] __write_once_size ./include/linux/compiler.h:249 + [< inline >] __hlist_del ./include/linux/list.h:622 + [< inline >] hlist_del_init ./include/linux/list.h:637 + [] l2tp_ip6_close+0x22e/0x290 net/l2tp/l2tp_ip6.c:239 + [] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415 + [] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422 + [] sock_release+0x8d/0x1d0 net/socket.c:570 + [] sock_close+0x16/0x20 net/socket.c:1017 + [] __fput+0x28c/0x780 fs/file_table.c:208 + [] ____fput+0x15/0x20 fs/file_table.c:244 + [] task_work_run+0xf9/0x170 + [] do_exit+0x85e/0x2a00 + [] do_group_exit+0x108/0x330 + [] get_signal+0x617/0x17a0 kernel/signal.c:2307 + [] do_signal+0x7f/0x18f0 + [] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156 + [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 + [] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259 + [] entry_SYSCALL_64_fastpath+0xc4/0xc6 +Object at ffff8800081b0ec0, in cache L2TP/IPv6 size: 1448 +Allocated: +PID = 10987 + [ 1116.897025] [] save_stack_trace+0x16/0x20 + [ 1116.897025] [] save_stack+0x46/0xd0 + [ 1116.897025] [] kasan_kmalloc+0xad/0xe0 + [ 1116.897025] [] kasan_slab_alloc+0x12/0x20 + [ 1116.897025] [< inline >] slab_post_alloc_hook mm/slab.h:417 + [ 1116.897025] [< inline >] slab_alloc_node mm/slub.c:2708 + [ 1116.897025] [< inline >] slab_alloc mm/slub.c:2716 + [ 1116.897025] [] kmem_cache_alloc+0xc8/0x2b0 mm/slub.c:2721 + [ 1116.897025] [] sk_prot_alloc+0x69/0x2b0 net/core/sock.c:1326 + [ 1116.897025] [] sk_alloc+0x38/0xae0 net/core/sock.c:1388 + [ 1116.897025] [] inet6_create+0x2d7/0x1000 net/ipv6/af_inet6.c:182 + [ 1116.897025] [] __sock_create+0x37b/0x640 net/socket.c:1153 + [ 1116.897025] [< inline >] sock_create net/socket.c:1193 + [ 1116.897025] [< inline >] SYSC_socket net/socket.c:1223 + [ 1116.897025] [] SyS_socket+0xef/0x1b0 net/socket.c:1203 + [ 1116.897025] [] entry_SYSCALL_64_fastpath+0x23/0xc6 +Freed: +PID = 10987 + [ 1116.897025] [] save_stack_trace+0x16/0x20 + [ 1116.897025] [] save_stack+0x46/0xd0 + [ 1116.897025] [] kasan_slab_free+0x71/0xb0 + [ 1116.897025] [< inline >] slab_free_hook mm/slub.c:1352 + [ 1116.897025] [< inline >] slab_free_freelist_hook mm/slub.c:1374 + [ 1116.897025] [< inline >] slab_free mm/slub.c:2951 + [ 1116.897025] [] kmem_cache_free+0xc8/0x330 mm/slub.c:2973 + [ 1116.897025] [< inline >] sk_prot_free net/core/sock.c:1369 + [ 1116.897025] [] __sk_destruct+0x32b/0x4f0 net/core/sock.c:1444 + [ 1116.897025] [] sk_destruct+0x44/0x80 net/core/sock.c:1452 + [ 1116.897025] [] __sk_free+0x53/0x220 net/core/sock.c:1460 + [ 1116.897025] [] sk_free+0x23/0x30 net/core/sock.c:1471 + [ 1116.897025] [] sk_common_release+0x28c/0x3e0 ./include/net/sock.h:1589 + [ 1116.897025] [] l2tp_ip6_close+0x1fe/0x290 net/l2tp/l2tp_ip6.c:243 + [ 1116.897025] [] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415 + [ 1116.897025] [] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422 + [ 1116.897025] [] sock_release+0x8d/0x1d0 net/socket.c:570 + [ 1116.897025] [] sock_close+0x16/0x20 net/socket.c:1017 + [ 1116.897025] [] __fput+0x28c/0x780 fs/file_table.c:208 + [ 1116.897025] [] ____fput+0x15/0x20 fs/file_table.c:244 + [ 1116.897025] [] task_work_run+0xf9/0x170 + [ 1116.897025] [] do_exit+0x85e/0x2a00 + [ 1116.897025] [] do_group_exit+0x108/0x330 + [ 1116.897025] [] get_signal+0x617/0x17a0 kernel/signal.c:2307 + [ 1116.897025] [] do_signal+0x7f/0x18f0 + [ 1116.897025] [] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156 + [ 1116.897025] [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 + [ 1116.897025] [] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259 + [ 1116.897025] [] entry_SYSCALL_64_fastpath+0xc4/0xc6 +Memory state around the buggy address: + ffff8800081b0d80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800081b0e00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +>ffff8800081b0e80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb + ^ + ffff8800081b0f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8800081b0f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + +================================================================== + +The same issue exists with l2tp_ip_bind() and l2tp_ip_bind_table. + +Fixes: c51ce49735c1 ("l2tp: fix oops in L2TP IP sockets for connect() AF_UNSPEC case") +Reported-by: Baozeng Ding +Reported-by: Andrey Konovalov +Tested-by: Baozeng Ding +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ip.c | 5 +++-- + net/l2tp/l2tp_ip6.c | 5 +++-- + 2 files changed, 6 insertions(+), 4 deletions(-) + +--- a/net/l2tp/l2tp_ip.c ++++ b/net/l2tp/l2tp_ip.c +@@ -251,8 +251,6 @@ static int l2tp_ip_bind(struct sock *sk, + int ret; + int chk_addr_ret; + +- if (!sock_flag(sk, SOCK_ZAPPED)) +- return -EINVAL; + if (addr_len < sizeof(struct sockaddr_l2tpip)) + return -EINVAL; + if (addr->l2tp_family != AF_INET) +@@ -267,6 +265,9 @@ static int l2tp_ip_bind(struct sock *sk, + read_unlock_bh(&l2tp_ip_lock); + + lock_sock(sk); ++ if (!sock_flag(sk, SOCK_ZAPPED)) ++ goto out; ++ + if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip)) + goto out; + +--- a/net/l2tp/l2tp_ip6.c ++++ b/net/l2tp/l2tp_ip6.c +@@ -269,8 +269,6 @@ static int l2tp_ip6_bind(struct sock *sk + int addr_type; + int err; + +- if (!sock_flag(sk, SOCK_ZAPPED)) +- return -EINVAL; + if (addr->l2tp_family != AF_INET6) + return -EINVAL; + if (addr_len < sizeof(*addr)) +@@ -296,6 +294,9 @@ static int l2tp_ip6_bind(struct sock *sk + lock_sock(sk); + + err = -EINVAL; ++ if (!sock_flag(sk, SOCK_ZAPPED)) ++ goto out_unlock; ++ + if (sk->sk_state != TCP_CLOSE) + goto out_unlock; + diff --git a/queue-4.8/net-avoid-signed-overflows-for-so_-snd-rcv-bufforce.patch b/queue-4.8/net-avoid-signed-overflows-for-so_-snd-rcv-bufforce.patch new file mode 100644 index 00000000000..a3fb58d0f3e --- /dev/null +++ b/queue-4.8/net-avoid-signed-overflows-for-so_-snd-rcv-bufforce.patch @@ -0,0 +1,50 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Eric Dumazet +Date: Fri, 2 Dec 2016 09:44:53 -0800 +Subject: net: avoid signed overflows for SO_{SND|RCV}BUFFORCE + +From: Eric Dumazet + + +[ Upstream commit b98b0bc8c431e3ceb4b26b0dfc8db509518fb290 ] + +CAP_NET_ADMIN users should not be allowed to set negative +sk_sndbuf or sk_rcvbuf values, as it can lead to various memory +corruptions, crashes, OOM... + +Note that before commit 82981930125a ("net: cleanups in +sock_setsockopt()"), the bug was even more serious, since SO_SNDBUF +and SO_RCVBUF were vulnerable. + +This needs to be backported to all known linux kernels. + +Again, many thanks to syzkaller team for discovering this gem. + +Signed-off-by: Eric Dumazet +Reported-by: Andrey Konovalov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -715,7 +715,7 @@ int sock_setsockopt(struct socket *sock, + val = min_t(u32, val, sysctl_wmem_max); + set_sndbuf: + sk->sk_userlocks |= SOCK_SNDBUF_LOCK; +- sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF); ++ sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); + /* Wake up sending tasks if we upped the value. */ + sk->sk_write_space(sk); + break; +@@ -751,7 +751,7 @@ set_rcvbuf: + * returning the value we actually used in getsockopt + * is the most desirable behavior. + */ +- sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF); ++ sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); + break; + + case SO_RCVBUFFORCE: diff --git a/queue-4.8/net-bcmgenet-utilize-correct-struct-device-for-all-dma-operations.patch b/queue-4.8/net-bcmgenet-utilize-correct-struct-device-for-all-dma-operations.patch new file mode 100644 index 00000000000..3529e6bf0b5 --- /dev/null +++ b/queue-4.8/net-bcmgenet-utilize-correct-struct-device-for-all-dma-operations.patch @@ -0,0 +1,67 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Florian Fainelli +Date: Thu, 1 Dec 2016 09:45:45 -0800 +Subject: net: bcmgenet: Utilize correct struct device for all DMA operations + +From: Florian Fainelli + + +[ Upstream commit 8c4799ac799665065f9bf1364fd71bf4f7dc6a4a ] + +__bcmgenet_tx_reclaim() and bcmgenet_free_rx_buffers() are not using the +same struct device during unmap that was used for the map operation, +which makes DMA-API debugging warn about it. Fix this by always using +&priv->pdev->dev throughout the driver, using an identical device +reference for all map/unmap calls. + +Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/genet/bcmgenet.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c ++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c +@@ -1172,6 +1172,7 @@ static unsigned int __bcmgenet_tx_reclai + struct bcmgenet_tx_ring *ring) + { + struct bcmgenet_priv *priv = netdev_priv(dev); ++ struct device *kdev = &priv->pdev->dev; + struct enet_cb *tx_cb_ptr; + struct netdev_queue *txq; + unsigned int pkts_compl = 0; +@@ -1199,13 +1200,13 @@ static unsigned int __bcmgenet_tx_reclai + if (tx_cb_ptr->skb) { + pkts_compl++; + bytes_compl += GENET_CB(tx_cb_ptr->skb)->bytes_sent; +- dma_unmap_single(&dev->dev, ++ dma_unmap_single(kdev, + dma_unmap_addr(tx_cb_ptr, dma_addr), + dma_unmap_len(tx_cb_ptr, dma_len), + DMA_TO_DEVICE); + bcmgenet_free_cb(tx_cb_ptr); + } else if (dma_unmap_addr(tx_cb_ptr, dma_addr)) { +- dma_unmap_page(&dev->dev, ++ dma_unmap_page(kdev, + dma_unmap_addr(tx_cb_ptr, dma_addr), + dma_unmap_len(tx_cb_ptr, dma_len), + DMA_TO_DEVICE); +@@ -1775,6 +1776,7 @@ static int bcmgenet_alloc_rx_buffers(str + + static void bcmgenet_free_rx_buffers(struct bcmgenet_priv *priv) + { ++ struct device *kdev = &priv->pdev->dev; + struct enet_cb *cb; + int i; + +@@ -1782,7 +1784,7 @@ static void bcmgenet_free_rx_buffers(str + cb = &priv->rx_cbs[i]; + + if (dma_unmap_addr(cb, dma_addr)) { +- dma_unmap_single(&priv->dev->dev, ++ dma_unmap_single(kdev, + dma_unmap_addr(cb, dma_addr), + priv->rx_buf_len, DMA_FROM_DEVICE); + dma_unmap_addr_set(cb, dma_addr, 0); diff --git a/queue-4.8/net-check-dead-netns-for-peernet2id_alloc.patch b/queue-4.8/net-check-dead-netns-for-peernet2id_alloc.patch new file mode 100644 index 00000000000..db6f8bc2d1e --- /dev/null +++ b/queue-4.8/net-check-dead-netns-for-peernet2id_alloc.patch @@ -0,0 +1,54 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: WANG Cong +Date: Wed, 16 Nov 2016 10:27:02 -0800 +Subject: net: check dead netns for peernet2id_alloc() + +From: WANG Cong + + +[ Upstream commit cfc44a4d147ea605d66ccb917cc24467d15ff867 ] + +Andrei reports we still allocate netns ID from idr after we destroy +it in cleanup_net(). + +cleanup_net(): + ... + idr_destroy(&net->netns_ids); + ... + list_for_each_entry_reverse(ops, &pernet_list, list) + ops_exit_list(ops, &net_exit_list); + -> rollback_registered_many() + -> rtmsg_ifinfo_build_skb() + -> rtnl_fill_ifinfo() + -> peernet2id_alloc() + +After that point we should not even access net->netns_ids, we +should check the death of the current netns as early as we can in +peernet2id_alloc(). + +For net-next we can consider to avoid sending rtmsg totally, +it is a good optimization for netns teardown path. + +Fixes: 0c7aecd4bde4 ("netns: add rtnl cmd to add and get peer netns ids") +Reported-by: Andrei Vagin +Cc: Nicolas Dichtel +Signed-off-by: Cong Wang +Acked-by: Andrei Vagin +Signed-off-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/net_namespace.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/core/net_namespace.c ++++ b/net/core/net_namespace.c +@@ -217,6 +217,8 @@ int peernet2id_alloc(struct net *net, st + bool alloc; + int id; + ++ if (atomic_read(&net->count) == 0) ++ return NETNSA_NSID_NOT_ASSIGNED; + spin_lock_irqsave(&net->nsid_lock, flags); + alloc = atomic_read(&peer->count) == 0 ? false : true; + id = __peernet2id_alloc(net, peer, &alloc); diff --git a/queue-4.8/net-dccp-fix-use-after-free-in-dccp_invalid_packet.patch b/queue-4.8/net-dccp-fix-use-after-free-in-dccp_invalid_packet.patch new file mode 100644 index 00000000000..56d784a038f --- /dev/null +++ b/queue-4.8/net-dccp-fix-use-after-free-in-dccp_invalid_packet.patch @@ -0,0 +1,58 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Eric Dumazet +Date: Mon, 28 Nov 2016 06:26:49 -0800 +Subject: net/dccp: fix use-after-free in dccp_invalid_packet + +From: Eric Dumazet + + +[ Upstream commit 648f0c28df282636c0c8a7a19ca3ce5fc80a39c3 ] + +pskb_may_pull() can reallocate skb->head, we need to reload dh pointer +in dccp_invalid_packet() or risk use after free. + +Bug found by Andrey Konovalov using syzkaller. + +Signed-off-by: Eric Dumazet +Reported-by: Andrey Konovalov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv4.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -700,6 +700,7 @@ int dccp_invalid_packet(struct sk_buff * + { + const struct dccp_hdr *dh; + unsigned int cscov; ++ u8 dccph_doff; + + if (skb->pkt_type != PACKET_HOST) + return 1; +@@ -721,18 +722,19 @@ int dccp_invalid_packet(struct sk_buff * + /* + * If P.Data Offset is too small for packet type, drop packet and return + */ +- if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { +- DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff); ++ dccph_doff = dh->dccph_doff; ++ if (dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { ++ DCCP_WARN("P.Data Offset(%u) too small\n", dccph_doff); + return 1; + } + /* + * If P.Data Offset is too too large for packet, drop packet and return + */ +- if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { +- DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff); ++ if (!pskb_may_pull(skb, dccph_doff * sizeof(u32))) { ++ DCCP_WARN("P.Data Offset(%u) too large\n", dccph_doff); + return 1; + } +- ++ dh = dccp_hdr(skb); + /* + * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet + * has short sequence numbers), drop packet and return diff --git a/queue-4.8/net-dsa-b53-fix-vlan-usage-and-how-we-treat-cpu-port.patch b/queue-4.8/net-dsa-b53-fix-vlan-usage-and-how-we-treat-cpu-port.patch new file mode 100644 index 00000000000..5a6fcc514fc --- /dev/null +++ b/queue-4.8/net-dsa-b53-fix-vlan-usage-and-how-we-treat-cpu-port.patch @@ -0,0 +1,101 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Florian Fainelli +Date: Tue, 15 Nov 2016 15:58:15 -0800 +Subject: net: dsa: b53: Fix VLAN usage and how we treat CPU port + +From: Florian Fainelli + + +[ Upstream commit e47112d9d6009bf6b7438cedc0270316d6b0370d ] + +We currently have a fundamental problem in how we treat the CPU port and +its VLAN membership. As soon as a second VLAN is configured to be +untagged, the CPU automatically becomes untagged for that VLAN as well, +and yet, we don't gracefully make sure that the CPU becomes tagged in +the other VLANs it could be a member of. This results in only one VLAN +being effectively usable from the CPU's perspective. + +Instead of having some pretty complex logic which tries to maintain the +CPU port's default VLAN and its untagged properties, just do something +very simple which consists in neither altering the CPU port's PVID +settings, nor its untagged settings: + +- whenever a VLAN is added, the CPU is automatically a member of this + VLAN group, as a tagged member +- PVID settings for downstream ports do not alter the CPU port's PVID + since it now is part of all VLANs in the system + +This means that a typical example where e.g: LAN ports are in VLAN1, and +WAN port is in VLAN2, now require having two VLAN interfaces for the +host to properly terminate and send traffic from/to. + +Fixes: Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") +Reported-by: Hartmut Knaack +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/b53/b53_common.c | 16 ++++------------ + 1 file changed, 4 insertions(+), 12 deletions(-) + +--- a/drivers/net/dsa/b53/b53_common.c ++++ b/drivers/net/dsa/b53/b53_common.c +@@ -904,9 +904,10 @@ static void b53_vlan_add(struct dsa_swit + + vl->members |= BIT(port) | BIT(cpu_port); + if (untagged) +- vl->untag |= BIT(port) | BIT(cpu_port); ++ vl->untag |= BIT(port); + else +- vl->untag &= ~(BIT(port) | BIT(cpu_port)); ++ vl->untag &= ~BIT(port); ++ vl->untag &= ~BIT(cpu_port); + + b53_set_vlan_entry(dev, vid, vl); + b53_fast_age_vlan(dev, vid); +@@ -915,8 +916,6 @@ static void b53_vlan_add(struct dsa_swit + if (pvid) { + b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), + vlan->vid_end); +- b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port), +- vlan->vid_end); + b53_fast_age_vlan(dev, vid); + } + } +@@ -926,7 +925,6 @@ static int b53_vlan_del(struct dsa_switc + { + struct b53_device *dev = ds_to_priv(ds); + bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; +- unsigned int cpu_port = dev->cpu_port; + struct b53_vlan *vl; + u16 vid; + u16 pvid; +@@ -939,8 +937,6 @@ static int b53_vlan_del(struct dsa_switc + b53_get_vlan_entry(dev, vid, vl); + + vl->members &= ~BIT(port); +- if ((vl->members & BIT(cpu_port)) == BIT(cpu_port)) +- vl->members = 0; + + if (pvid == vid) { + if (is5325(dev) || is5365(dev)) +@@ -949,18 +945,14 @@ static int b53_vlan_del(struct dsa_switc + pvid = 0; + } + +- if (untagged) { ++ if (untagged) + vl->untag &= ~(BIT(port)); +- if ((vl->untag & BIT(cpu_port)) == BIT(cpu_port)) +- vl->untag = 0; +- } + + b53_set_vlan_entry(dev, vid, vl); + b53_fast_age_vlan(dev, vid); + } + + b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), pvid); +- b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port), pvid); + b53_fast_age_vlan(dev, pvid); + + return 0; diff --git a/queue-4.8/net-dsa-bcm_sf2-ensure-we-re-negotiate-eee-during-after-link-change.patch b/queue-4.8/net-dsa-bcm_sf2-ensure-we-re-negotiate-eee-during-after-link-change.patch new file mode 100644 index 00000000000..55ad99ea7ce --- /dev/null +++ b/queue-4.8/net-dsa-bcm_sf2-ensure-we-re-negotiate-eee-during-after-link-change.patch @@ -0,0 +1,41 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Florian Fainelli +Date: Tue, 22 Nov 2016 11:40:58 -0800 +Subject: net: dsa: bcm_sf2: Ensure we re-negotiate EEE during after link change + +From: Florian Fainelli + + +[ Upstream commit 76da8706d90d8641eeb9b8e579942ed80b6c0880 ] + +In case the link change and EEE is enabled or disabled, always try to +re-negotiate this with the link partner. + +Fixes: 450b05c15f9c ("net: dsa: bcm_sf2: add support for controlling EEE") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -1167,6 +1167,7 @@ static void bcm_sf2_sw_adjust_link(struc + struct phy_device *phydev) + { + struct bcm_sf2_priv *priv = ds_to_priv(ds); ++ struct ethtool_eee *p = &priv->port_sts[port].eee; + u32 id_mode_dis = 0, port_mode; + const char *str = NULL; + u32 reg; +@@ -1241,6 +1242,9 @@ force_link: + reg |= DUPLX_MODE; + + core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port)); ++ ++ if (!phydev->is_pseudo_fixed_link) ++ p->eee_enabled = bcm_sf2_eee_init(ds, port, phydev); + } + + static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, diff --git a/queue-4.8/net-dsa-fix-unbalanced-dsa_switch_tree-reference-counting.patch b/queue-4.8/net-dsa-fix-unbalanced-dsa_switch_tree-reference-counting.patch new file mode 100644 index 00000000000..7a97cb3381d --- /dev/null +++ b/queue-4.8/net-dsa-fix-unbalanced-dsa_switch_tree-reference-counting.patch @@ -0,0 +1,49 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Nikita Yushchenko +Date: Mon, 28 Nov 2016 09:48:48 +0300 +Subject: net: dsa: fix unbalanced dsa_switch_tree reference counting + +From: Nikita Yushchenko + + +[ Upstream commit 7a99cd6e213685b78118382e6a8fed506c82ccb2 ] + +_dsa_register_switch() gets a dsa_switch_tree object either via +dsa_get_dst() or via dsa_add_dst(). Former path does not increase kref +in returned object (resulting into caller not owning a reference), +while later path does create a new object (resulting into caller owning +a reference). + +The rest of _dsa_register_switch() assumes that it owns a reference, and +calls dsa_put_dst(). + +This causes a memory breakage if first switch in the tree initialized +successfully, but second failed to initialize. In particular, freed +dsa_swith_tree object is left referenced by switch that was initialized, +and later access to sysfs attributes of that switch cause OOPS. + +To fix, need to add kref_get() call to dsa_get_dst(). + +Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation") +Signed-off-by: Nikita Yushchenko +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dsa/dsa2.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/dsa/dsa2.c ++++ b/net/dsa/dsa2.c +@@ -28,8 +28,10 @@ static struct dsa_switch_tree *dsa_get_d + struct dsa_switch_tree *dst; + + list_for_each_entry(dst, &dsa_switch_trees, list) +- if (dst->tree == tree) ++ if (dst->tree == tree) { ++ kref_get(&dst->refcount); + return dst; ++ } + return NULL; + } + diff --git a/queue-4.8/net-macb-fix-the-rx-queue-reset-in-macb_rx.patch b/queue-4.8/net-macb-fix-the-rx-queue-reset-in-macb_rx.patch new file mode 100644 index 00000000000..c51b58b7691 --- /dev/null +++ b/queue-4.8/net-macb-fix-the-rx-queue-reset-in-macb_rx.patch @@ -0,0 +1,56 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Cyrille Pitchen +Date: Mon, 28 Nov 2016 14:40:55 +0100 +Subject: net: macb: fix the RX queue reset in macb_rx() + +From: Cyrille Pitchen + + +[ Upstream commit a0b44eea372b449ef9744fb1d90491cc063289b8 ] + +On macb only (not gem), when a RX queue corruption was detected from +macb_rx(), the RX queue was reset: during this process the RX ring +buffer descriptor was initialized by macb_init_rx_ring() but we forgot +to also set bp->rx_tail to 0. + +Indeed, when processing the received frames, bp->rx_tail provides the +macb driver with the index in the RX ring buffer of the next buffer to +process. So when the whole ring buffer is reset we must also reset +bp->rx_tail so the driver is synchronized again with the hardware. + +Since macb_init_rx_ring() is called from many locations, currently from +macb_rx() and macb_init_rings(), we'd rather add the "bp->rx_tail = 0;" +line inside macb_init_rx_ring() than add the very same line after each +call of this function. + +Without this fix, the rx queue is not reset properly to recover from +queue corruption and connection drop may occur. + +Signed-off-by: Cyrille Pitchen +Fixes: 9ba723b081a2 ("net: macb: remove BUG_ON() and reset the queue to handle RX errors") +Acked-by: Nicolas Ferre +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cadence/macb.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/net/ethernet/cadence/macb.c ++++ b/drivers/net/ethernet/cadence/macb.c +@@ -959,6 +959,7 @@ static inline void macb_init_rx_ring(str + addr += bp->rx_buffer_size; + } + bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP); ++ bp->rx_tail = 0; + } + + static int macb_rx(struct macb *bp, int budget) +@@ -1597,8 +1598,6 @@ static void macb_init_rings(struct macb + bp->queues[0].tx_head = 0; + bp->queues[0].tx_tail = 0; + bp->queues[0].tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP); +- +- bp->rx_tail = 0; + } + + static void macb_reset_hw(struct macb *bp) diff --git a/queue-4.8/net-ping-check-minimum-size-on-icmp-header-length.patch b/queue-4.8/net-ping-check-minimum-size-on-icmp-header-length.patch new file mode 100644 index 00000000000..4dd03194089 --- /dev/null +++ b/queue-4.8/net-ping-check-minimum-size-on-icmp-header-length.patch @@ -0,0 +1,72 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Kees Cook +Date: Mon, 5 Dec 2016 10:34:38 -0800 +Subject: net: ping: check minimum size on ICMP header length + +From: Kees Cook + + +[ Upstream commit 0eab121ef8750a5c8637d51534d5e9143fb0633f ] + +Prior to commit c0371da6047a ("put iov_iter into msghdr") in v3.19, there +was no check that the iovec contained enough bytes for an ICMP header, +and the read loop would walk across neighboring stack contents. Since the +iov_iter conversion, bad arguments are noticed, but the returned error is +EFAULT. Returning EINVAL is a clearer error and also solves the problem +prior to v3.19. + +This was found using trinity with KASAN on v3.18: + +BUG: KASAN: stack-out-of-bounds in memcpy_fromiovec+0x60/0x114 at addr ffffffc071077da0 +Read of size 8 by task trinity-c2/9623 +page:ffffffbe034b9a08 count:0 mapcount:0 mapping: (null) index:0x0 +flags: 0x0() +page dumped because: kasan: bad access detected +CPU: 0 PID: 9623 Comm: trinity-c2 Tainted: G BU 3.18.0-dirty #15 +Hardware name: Google Tegra210 Smaug Rev 1,3+ (DT) +Call trace: +[] dump_backtrace+0x0/0x1ac arch/arm64/kernel/traps.c:90 +[] show_stack+0x10/0x1c arch/arm64/kernel/traps.c:171 +[< inline >] __dump_stack lib/dump_stack.c:15 +[] dump_stack+0x7c/0xd0 lib/dump_stack.c:50 +[< inline >] print_address_description mm/kasan/report.c:147 +[< inline >] kasan_report_error mm/kasan/report.c:236 +[] kasan_report+0x380/0x4b8 mm/kasan/report.c:259 +[< inline >] check_memory_region mm/kasan/kasan.c:264 +[] __asan_load8+0x20/0x70 mm/kasan/kasan.c:507 +[] memcpy_fromiovec+0x5c/0x114 lib/iovec.c:15 +[< inline >] memcpy_from_msg include/linux/skbuff.h:2667 +[] ping_common_sendmsg+0x50/0x108 net/ipv4/ping.c:674 +[] ping_v4_sendmsg+0xd8/0x698 net/ipv4/ping.c:714 +[] inet_sendmsg+0xe0/0x12c net/ipv4/af_inet.c:749 +[< inline >] __sock_sendmsg_nosec net/socket.c:624 +[< inline >] __sock_sendmsg net/socket.c:632 +[] sock_sendmsg+0x124/0x164 net/socket.c:643 +[< inline >] SYSC_sendto net/socket.c:1797 +[] SyS_sendto+0x178/0x1d8 net/socket.c:1761 + +CVE-2016-8399 + +Reported-by: Qidan He +Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") +Cc: stable@vger.kernel.org +Signed-off-by: Kees Cook +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ping.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv4/ping.c ++++ b/net/ipv4/ping.c +@@ -662,6 +662,10 @@ int ping_common_sendmsg(int family, stru + if (len > 0xFFFF) + return -EMSGSIZE; + ++ /* Must have at least a full ICMP header. */ ++ if (len < icmph_len) ++ return -EINVAL; ++ + /* + * Check the flags. + */ diff --git a/queue-4.8/net-sched-pedit-make-sure-that-offset-is-valid.patch b/queue-4.8/net-sched-pedit-make-sure-that-offset-is-valid.patch new file mode 100644 index 00000000000..ecf992e1292 --- /dev/null +++ b/queue-4.8/net-sched-pedit-make-sure-that-offset-is-valid.patch @@ -0,0 +1,68 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Amir Vadai +Date: Mon, 28 Nov 2016 12:56:40 +0200 +Subject: net/sched: pedit: make sure that offset is valid + +From: Amir Vadai + + +[ Upstream commit 95c2027bfeda21a28eb245121e6a249f38d0788e ] + +Add a validation function to make sure offset is valid: +1. Not below skb head (could happen when offset is negative). +2. Validate both 'offset' and 'at'. + +Signed-off-by: Amir Vadai +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_pedit.c | 24 ++++++++++++++++++++---- + 1 file changed, 20 insertions(+), 4 deletions(-) + +--- a/net/sched/act_pedit.c ++++ b/net/sched/act_pedit.c +@@ -108,6 +108,17 @@ static void tcf_pedit_cleanup(struct tc_ + kfree(keys); + } + ++static bool offset_valid(struct sk_buff *skb, int offset) ++{ ++ if (offset > 0 && offset > skb->len) ++ return false; ++ ++ if (offset < 0 && -offset > skb_headroom(skb)) ++ return false; ++ ++ return true; ++} ++ + static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) + { +@@ -134,6 +145,11 @@ static int tcf_pedit(struct sk_buff *skb + if (tkey->offmask) { + char *d, _d; + ++ if (!offset_valid(skb, off + tkey->at)) { ++ pr_info("tc filter pedit 'at' offset %d out of bounds\n", ++ off + tkey->at); ++ goto bad; ++ } + d = skb_header_pointer(skb, off + tkey->at, 1, + &_d); + if (!d) +@@ -146,10 +162,10 @@ static int tcf_pedit(struct sk_buff *skb + " offset must be on 32 bit boundaries\n"); + goto bad; + } +- if (offset > 0 && offset > skb->len) { +- pr_info("tc filter pedit" +- " offset %d can't exceed pkt length %d\n", +- offset, skb->len); ++ ++ if (!offset_valid(skb, off + offset)) { ++ pr_info("tc filter pedit offset %d out of bounds\n", ++ offset); + goto bad; + } + diff --git a/queue-4.8/net-sched-respect-rcu-grace-period-on-cls-destruction.patch b/queue-4.8/net-sched-respect-rcu-grace-period-on-cls-destruction.patch new file mode 100644 index 00000000000..91cbfc364e0 --- /dev/null +++ b/queue-4.8/net-sched-respect-rcu-grace-period-on-cls-destruction.patch @@ -0,0 +1,257 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Daniel Borkmann +Date: Sun, 27 Nov 2016 01:18:01 +0100 +Subject: net, sched: respect rcu grace period on cls destruction + +From: Daniel Borkmann + + +[ Upstream commit d936377414fadbafb4d17148d222fe45ca5442d4 ] + +Roi reported a crash in flower where tp->root was NULL in ->classify() +callbacks. Reason is that in ->destroy() tp->root is set to NULL via +RCU_INIT_POINTER(). It's problematic for some of the classifiers, because +this doesn't respect RCU grace period for them, and as a result, still +outstanding readers from tc_classify() will try to blindly dereference +a NULL tp->root. + +The tp->root object is strictly private to the classifier implementation +and holds internal data the core such as tc_ctl_tfilter() doesn't know +about. Within some classifiers, such as cls_bpf, cls_basic, etc, tp->root +is only checked for NULL in ->get() callback, but nowhere else. This is +misleading and seemed to be copied from old classifier code that was not +cleaned up properly. For example, d3fa76ee6b4a ("[NET_SCHED]: cls_basic: +fix NULL pointer dereference") moved tp->root initialization into ->init() +routine, where before it was part of ->change(), so ->get() had to deal +with tp->root being NULL back then, so that was indeed a valid case, after +d3fa76ee6b4a, not really anymore. We used to set tp->root to NULL long +ago in ->destroy(), see 47a1a1d4be29 ("pkt_sched: remove unnecessary xchg() +in packet classifiers"); but the NULLifying was reintroduced with the +RCUification, but it's not correct for every classifier implementation. + +In the cases that are fixed here with one exception of cls_cgroup, tp->root +object is allocated and initialized inside ->init() callback, which is always +performed at a point in time after we allocate a new tp, which means tp and +thus tp->root was not globally visible in the tp chain yet (see tc_ctl_tfilter()). +Also, on destruction tp->root is strictly kfree_rcu()'ed in ->destroy() +handler, same for the tp which is kfree_rcu()'ed right when we return +from ->destroy() in tcf_destroy(). This means, the head object's lifetime +for such classifiers is always tied to the tp lifetime. The RCU callback +invocation for the two kfree_rcu() could be out of order, but that's fine +since both are independent. + +Dropping the RCU_INIT_POINTER(tp->root, NULL) for these classifiers here +means that 1) we don't need a useless NULL check in fast-path and, 2) that +outstanding readers of that tp in tc_classify() can still execute under +respect with RCU grace period as it is actually expected. + +Things that haven't been touched here: cls_fw and cls_route. They each +handle tp->root being NULL in ->classify() path for historic reasons, so +their ->destroy() implementation can stay as is. If someone actually +cares, they could get cleaned up at some point to avoid the test in fast +path. cls_u32 doesn't set tp->root to NULL. For cls_rsvp, I just added a +!head should anyone actually be using/testing it, so it at least aligns with +cls_fw and cls_route. For cls_flower we additionally need to defer rhashtable +destruction (to a sleepable context) after RCU grace period as concurrent +readers might still access it. (Note that in this case we need to hold module +reference to keep work callback address intact, since we only wait on module +unload for all call_rcu()s to finish.) + +This fixes one race to bring RCU grace period guarantees back. Next step +as worked on by Cong however is to fix 1e052be69d04 ("net_sched: destroy +proto tp when all filters are gone") to get the order of unlinking the tp +in tc_ctl_tfilter() for the RTM_DELTFILTER case right by moving +RCU_INIT_POINTER() before tcf_destroy() and let the notification for +removal be done through the prior ->delete() callback. Both are independant +issues. Once we have that right, we can then clean tp->root up for a number +of classifiers by not making them RCU pointers, which requires a new callback +(->uninit) that is triggered from tp's RCU callback, where we just kfree() +tp->root from there. + +Fixes: 1f947bf151e9 ("net: sched: rcu'ify cls_bpf") +Fixes: 9888faefe132 ("net: sched: cls_basic use RCU") +Fixes: 70da9f0bf999 ("net: sched: cls_flow use RCU") +Fixes: 77b9900ef53a ("tc: introduce Flower classifier") +Fixes: bf3994d2ed31 ("net/sched: introduce Match-all classifier") +Fixes: 952313bd6258 ("net: sched: cls_cgroup use RCU") +Reported-by: Roi Dayan +Signed-off-by: Daniel Borkmann +Cc: Cong Wang +Cc: John Fastabend +Cc: Roi Dayan +Cc: Jiri Pirko +Acked-by: John Fastabend +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_basic.c | 4 ---- + net/sched/cls_bpf.c | 4 ---- + net/sched/cls_cgroup.c | 7 +++---- + net/sched/cls_flow.c | 1 - + net/sched/cls_flower.c | 31 ++++++++++++++++++++++++++----- + net/sched/cls_matchall.c | 1 - + net/sched/cls_rsvp.h | 3 ++- + net/sched/cls_tcindex.c | 1 - + 8 files changed, 31 insertions(+), 21 deletions(-) + +--- a/net/sched/cls_basic.c ++++ b/net/sched/cls_basic.c +@@ -62,9 +62,6 @@ static unsigned long basic_get(struct tc + struct basic_head *head = rtnl_dereference(tp->root); + struct basic_filter *f; + +- if (head == NULL) +- return 0UL; +- + list_for_each_entry(f, &head->flist, link) { + if (f->handle == handle) { + l = (unsigned long) f; +@@ -109,7 +106,6 @@ static bool basic_destroy(struct tcf_pro + tcf_unbind_filter(tp, &f->res); + call_rcu(&f->rcu, basic_delete_filter); + } +- RCU_INIT_POINTER(tp->root, NULL); + kfree_rcu(head, rcu); + return true; + } +--- a/net/sched/cls_bpf.c ++++ b/net/sched/cls_bpf.c +@@ -200,7 +200,6 @@ static bool cls_bpf_destroy(struct tcf_p + call_rcu(&prog->rcu, __cls_bpf_delete_prog); + } + +- RCU_INIT_POINTER(tp->root, NULL); + kfree_rcu(head, rcu); + return true; + } +@@ -211,9 +210,6 @@ static unsigned long cls_bpf_get(struct + struct cls_bpf_prog *prog; + unsigned long ret = 0UL; + +- if (head == NULL) +- return 0UL; +- + list_for_each_entry(prog, &head->plist, link) { + if (prog->handle == handle) { + ret = (unsigned long) prog; +--- a/net/sched/cls_cgroup.c ++++ b/net/sched/cls_cgroup.c +@@ -130,11 +130,10 @@ static bool cls_cgroup_destroy(struct tc + + if (!force) + return false; +- +- if (head) { +- RCU_INIT_POINTER(tp->root, NULL); ++ /* Head can still be NULL due to cls_cgroup_init(). */ ++ if (head) + call_rcu(&head->rcu, cls_cgroup_destroy_rcu); +- } ++ + return true; + } + +--- a/net/sched/cls_flow.c ++++ b/net/sched/cls_flow.c +@@ -583,7 +583,6 @@ static bool flow_destroy(struct tcf_prot + list_del_rcu(&f->list); + call_rcu(&f->rcu, flow_destroy_filter); + } +- RCU_INIT_POINTER(tp->root, NULL); + kfree_rcu(head, rcu); + return true; + } +--- a/net/sched/cls_flower.c ++++ b/net/sched/cls_flower.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -55,7 +56,10 @@ struct cls_fl_head { + bool mask_assigned; + struct list_head filters; + struct rhashtable_params ht_params; +- struct rcu_head rcu; ++ union { ++ struct work_struct work; ++ struct rcu_head rcu; ++ }; + }; + + struct cls_fl_filter { +@@ -239,6 +243,24 @@ static void fl_hw_update_stats(struct tc + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); + } + ++static void fl_destroy_sleepable(struct work_struct *work) ++{ ++ struct cls_fl_head *head = container_of(work, struct cls_fl_head, ++ work); ++ if (head->mask_assigned) ++ rhashtable_destroy(&head->ht); ++ kfree(head); ++ module_put(THIS_MODULE); ++} ++ ++static void fl_destroy_rcu(struct rcu_head *rcu) ++{ ++ struct cls_fl_head *head = container_of(rcu, struct cls_fl_head, rcu); ++ ++ INIT_WORK(&head->work, fl_destroy_sleepable); ++ schedule_work(&head->work); ++} ++ + static bool fl_destroy(struct tcf_proto *tp, bool force) + { + struct cls_fl_head *head = rtnl_dereference(tp->root); +@@ -252,10 +274,9 @@ static bool fl_destroy(struct tcf_proto + list_del_rcu(&f->list); + call_rcu(&f->rcu, fl_destroy_filter); + } +- RCU_INIT_POINTER(tp->root, NULL); +- if (head->mask_assigned) +- rhashtable_destroy(&head->ht); +- kfree_rcu(head, rcu); ++ ++ __module_get(THIS_MODULE); ++ call_rcu(&head->rcu, fl_destroy_rcu); + return true; + } + +--- a/net/sched/cls_matchall.c ++++ b/net/sched/cls_matchall.c +@@ -114,7 +114,6 @@ static bool mall_destroy(struct tcf_prot + + call_rcu(&f->rcu, mall_destroy_filter); + } +- RCU_INIT_POINTER(tp->root, NULL); + kfree_rcu(head, rcu); + return true; + } +--- a/net/sched/cls_rsvp.h ++++ b/net/sched/cls_rsvp.h +@@ -152,7 +152,8 @@ static int rsvp_classify(struct sk_buff + return -1; + nhptr = ip_hdr(skb); + #endif +- ++ if (unlikely(!head)) ++ return -1; + restart: + + #if RSVP_DST_LEN == 4 +--- a/net/sched/cls_tcindex.c ++++ b/net/sched/cls_tcindex.c +@@ -503,7 +503,6 @@ static bool tcindex_destroy(struct tcf_p + walker.fn = tcindex_destroy_element; + tcindex_walk(tp, &walker); + +- RCU_INIT_POINTER(tp->root, NULL); + call_rcu(&p->rcu, __tcindex_destroy); + return true; + } diff --git a/queue-4.8/net-sky2-fix-shutdown-crash.patch b/queue-4.8/net-sky2-fix-shutdown-crash.patch new file mode 100644 index 00000000000..a205f4402c6 --- /dev/null +++ b/queue-4.8/net-sky2-fix-shutdown-crash.patch @@ -0,0 +1,67 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Jeremy Linton +Date: Thu, 17 Nov 2016 09:14:25 -0600 +Subject: net: sky2: Fix shutdown crash + +From: Jeremy Linton + + +[ Upstream commit 06ba3b2133dc203e1e9bc36cee7f0839b79a9e8b ] + +The sky2 frequently crashes during machine shutdown with: + +sky2_get_stats+0x60/0x3d8 [sky2] +dev_get_stats+0x68/0xd8 +rtnl_fill_stats+0x54/0x140 +rtnl_fill_ifinfo+0x46c/0xc68 +rtmsg_ifinfo_build_skb+0x7c/0xf0 +rtmsg_ifinfo.part.22+0x3c/0x70 +rtmsg_ifinfo+0x50/0x5c +netdev_state_change+0x4c/0x58 +linkwatch_do_dev+0x50/0x88 +__linkwatch_run_queue+0x104/0x1a4 +linkwatch_event+0x30/0x3c +process_one_work+0x140/0x3e0 +worker_thread+0x60/0x44c +kthread+0xdc/0xf0 +ret_from_fork+0x10/0x50 + +This is caused by the sky2 being called after it has been shutdown. +A previous thread about this can be found here: + +https://lkml.org/lkml/2016/4/12/410 + +An alternative fix is to assure that IFF_UP gets cleared by +calling dev_close() during shutdown. This is similar to what the +bnx2/tg3/xgene and maybe others are doing to assure that the driver +isn't being called following _shutdown(). + +Signed-off-by: Jeremy Linton +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/sky2.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/drivers/net/ethernet/marvell/sky2.c ++++ b/drivers/net/ethernet/marvell/sky2.c +@@ -5220,6 +5220,19 @@ static SIMPLE_DEV_PM_OPS(sky2_pm_ops, sk + + static void sky2_shutdown(struct pci_dev *pdev) + { ++ struct sky2_hw *hw = pci_get_drvdata(pdev); ++ int port; ++ ++ for (port = 0; port < hw->ports; port++) { ++ struct net_device *ndev = hw->dev[port]; ++ ++ rtnl_lock(); ++ if (netif_running(ndev)) { ++ dev_close(ndev); ++ netif_device_detach(ndev); ++ } ++ rtnl_unlock(); ++ } + sky2_suspend(&pdev->dev); + pci_wake_from_d3(pdev, device_may_wakeup(&pdev->dev)); + pci_set_power_state(pdev, PCI_D3hot); diff --git a/queue-4.8/netlink-call-cb-done-from-a-worker-thread.patch b/queue-4.8/netlink-call-cb-done-from-a-worker-thread.patch new file mode 100644 index 00000000000..0dcfad06113 --- /dev/null +++ b/queue-4.8/netlink-call-cb-done-from-a-worker-thread.patch @@ -0,0 +1,91 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Herbert Xu +Date: Mon, 28 Nov 2016 19:22:12 +0800 +Subject: netlink: Call cb->done from a worker thread + +From: Herbert Xu + + +[ Upstream commit 707693c8a498697aa8db240b93eb76ec62e30892 ] + +The cb->done interface expects to be called in process context. +This was broken by the netlink RCU conversion. This patch fixes +it by adding a worker struct to make the cb->done call where +necessary. + +Fixes: 21e4902aea80 ("netlink: Lockless lookup with RCU grace...") +Reported-by: Subash Abhinov Kasiviswanathan +Signed-off-by: Herbert Xu +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 27 +++++++++++++++++++++++---- + net/netlink/af_netlink.h | 2 ++ + 2 files changed, 25 insertions(+), 4 deletions(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -322,14 +322,11 @@ static void netlink_skb_set_owner_r(stru + sk_mem_charge(sk, skb->truesize); + } + +-static void netlink_sock_destruct(struct sock *sk) ++static void __netlink_sock_destruct(struct sock *sk) + { + struct netlink_sock *nlk = nlk_sk(sk); + + if (nlk->cb_running) { +- if (nlk->cb.done) +- nlk->cb.done(&nlk->cb); +- + module_put(nlk->cb.module); + kfree_skb(nlk->cb.skb); + } +@@ -346,6 +343,28 @@ static void netlink_sock_destruct(struct + WARN_ON(nlk_sk(sk)->groups); + } + ++static void netlink_sock_destruct_work(struct work_struct *work) ++{ ++ struct netlink_sock *nlk = container_of(work, struct netlink_sock, ++ work); ++ ++ nlk->cb.done(&nlk->cb); ++ __netlink_sock_destruct(&nlk->sk); ++} ++ ++static void netlink_sock_destruct(struct sock *sk) ++{ ++ struct netlink_sock *nlk = nlk_sk(sk); ++ ++ if (nlk->cb_running && nlk->cb.done) { ++ INIT_WORK(&nlk->work, netlink_sock_destruct_work); ++ schedule_work(&nlk->work); ++ return; ++ } ++ ++ __netlink_sock_destruct(sk); ++} ++ + /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on + * SMP. Look, when several writers sleep and reader wakes them up, all but one + * immediately hit write lock and grab all the cpus. Exclusive sleep solves +--- a/net/netlink/af_netlink.h ++++ b/net/netlink/af_netlink.h +@@ -3,6 +3,7 @@ + + #include + #include ++#include + #include + + #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) +@@ -33,6 +34,7 @@ struct netlink_sock { + + struct rhash_head node; + struct rcu_head rcu; ++ struct work_struct work; + }; + + static inline struct netlink_sock *nlk_sk(struct sock *sk) diff --git a/queue-4.8/netlink-do-not-schedule-work-from-sk_destruct.patch b/queue-4.8/netlink-do-not-schedule-work-from-sk_destruct.patch new file mode 100644 index 00000000000..6889c04c1b3 --- /dev/null +++ b/queue-4.8/netlink-do-not-schedule-work-from-sk_destruct.patch @@ -0,0 +1,87 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Herbert Xu +Date: Mon, 5 Dec 2016 15:28:21 +0800 +Subject: netlink: Do not schedule work from sk_destruct + +From: Herbert Xu + + +[ Upstream commit ed5d7788a934a4b6d6d025e948ed4da496b4f12e ] + +It is wrong to schedule a work from sk_destruct using the socket +as the memory reserve because the socket will be freed immediately +after the return from sk_destruct. + +Instead we should do the deferral prior to sk_free. + +This patch does just that. + +Fixes: 707693c8a498 ("netlink: Call cb->done from a worker thread") +Signed-off-by: Herbert Xu +Tested-by: Andrey Konovalov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 32 +++++++++++++++----------------- + 1 file changed, 15 insertions(+), 17 deletions(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -322,11 +322,13 @@ static void netlink_skb_set_owner_r(stru + sk_mem_charge(sk, skb->truesize); + } + +-static void __netlink_sock_destruct(struct sock *sk) ++static void netlink_sock_destruct(struct sock *sk) + { + struct netlink_sock *nlk = nlk_sk(sk); + + if (nlk->cb_running) { ++ if (nlk->cb.done) ++ nlk->cb.done(&nlk->cb); + module_put(nlk->cb.module); + kfree_skb(nlk->cb.skb); + } +@@ -348,21 +350,7 @@ static void netlink_sock_destruct_work(s + struct netlink_sock *nlk = container_of(work, struct netlink_sock, + work); + +- nlk->cb.done(&nlk->cb); +- __netlink_sock_destruct(&nlk->sk); +-} +- +-static void netlink_sock_destruct(struct sock *sk) +-{ +- struct netlink_sock *nlk = nlk_sk(sk); +- +- if (nlk->cb_running && nlk->cb.done) { +- INIT_WORK(&nlk->work, netlink_sock_destruct_work); +- schedule_work(&nlk->work); +- return; +- } +- +- __netlink_sock_destruct(sk); ++ sk_free(&nlk->sk); + } + + /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on +@@ -667,8 +655,18 @@ out_module: + static void deferred_put_nlk_sk(struct rcu_head *head) + { + struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); ++ struct sock *sk = &nlk->sk; ++ ++ if (!atomic_dec_and_test(&sk->sk_refcnt)) ++ return; ++ ++ if (nlk->cb_running && nlk->cb.done) { ++ INIT_WORK(&nlk->work, netlink_sock_destruct_work); ++ schedule_work(&nlk->work); ++ return; ++ } + +- sock_put(&nlk->sk); ++ sk_free(sk); + } + + static int netlink_release(struct socket *sock) diff --git a/queue-4.8/packet-fix-race-condition-in-packet_set_ring.patch b/queue-4.8/packet-fix-race-condition-in-packet_set_ring.patch new file mode 100644 index 00000000000..b5e27007ac4 --- /dev/null +++ b/queue-4.8/packet-fix-race-condition-in-packet_set_ring.patch @@ -0,0 +1,93 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Philip Pettersson +Date: Wed, 30 Nov 2016 14:55:36 -0800 +Subject: packet: fix race condition in packet_set_ring + +From: Philip Pettersson + + +[ Upstream commit 84ac7260236a49c79eede91617700174c2c19b0c ] + +When packet_set_ring creates a ring buffer it will initialize a +struct timer_list if the packet version is TPACKET_V3. This value +can then be raced by a different thread calling setsockopt to +set the version to TPACKET_V1 before packet_set_ring has finished. + +This leads to a use-after-free on a function pointer in the +struct timer_list when the socket is closed as the previously +initialized timer will not be deleted. + +The bug is fixed by taking lock_sock(sk) in packet_setsockopt when +changing the packet version while also taking the lock at the start +of packet_set_ring. + +Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") +Signed-off-by: Philip Pettersson +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -3648,19 +3648,25 @@ packet_setsockopt(struct socket *sock, i + + if (optlen != sizeof(val)) + return -EINVAL; +- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) +- return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + switch (val) { + case TPACKET_V1: + case TPACKET_V2: + case TPACKET_V3: +- po->tp_version = val; +- return 0; ++ break; + default: + return -EINVAL; + } ++ lock_sock(sk); ++ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ++ ret = -EBUSY; ++ } else { ++ po->tp_version = val; ++ ret = 0; ++ } ++ release_sock(sk); ++ return ret; + } + case PACKET_RESERVE: + { +@@ -4164,6 +4170,7 @@ static int packet_set_ring(struct sock * + /* Added to avoid minimal code churn */ + struct tpacket_req *req = &req_u->req; + ++ lock_sock(sk); + /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ + if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { + net_warn_ratelimited("Tx-ring is not supported.\n"); +@@ -4245,7 +4252,6 @@ static int packet_set_ring(struct sock * + goto out; + } + +- lock_sock(sk); + + /* Detach socket from network */ + spin_lock(&po->bind_lock); +@@ -4294,11 +4300,11 @@ static int packet_set_ring(struct sock * + if (!tx_ring) + prb_shutdown_retire_blk_timer(po, rb_queue); + } +- release_sock(sk); + + if (pg_vec) + free_pg_vec(pg_vec, order, req->tp_block_nr); + out: ++ release_sock(sk); + return err; + } + diff --git a/queue-4.8/rtnetlink-fix-fdb-size-computation.patch b/queue-4.8/rtnetlink-fix-fdb-size-computation.patch new file mode 100644 index 00000000000..c8b0ff9b2e0 --- /dev/null +++ b/queue-4.8/rtnetlink-fix-fdb-size-computation.patch @@ -0,0 +1,34 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Sabrina Dubroca +Date: Fri, 18 Nov 2016 15:50:39 +0100 +Subject: rtnetlink: fix FDB size computation + +From: Sabrina Dubroca + + +[ Upstream commit f82ef3e10a870acc19fa04f80ef5877eaa26f41e ] + +Add missing NDA_VLAN attribute's size. + +Fixes: 1e53d5bb8878 ("net: Pass VLAN ID to rtnl_fdb_notify.") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -2791,7 +2791,10 @@ nla_put_failure: + + static inline size_t rtnl_fdb_nlmsg_size(void) + { +- return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN); ++ return NLMSG_ALIGN(sizeof(struct ndmsg)) + ++ nla_total_size(ETH_ALEN) + /* NDA_LLADDR */ ++ nla_total_size(sizeof(u16)) + /* NDA_VLAN */ ++ 0; + } + + static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type, diff --git a/queue-4.8/rtnl-fix-the-loop-index-update-error-in-rtnl_dump_ifinfo.patch b/queue-4.8/rtnl-fix-the-loop-index-update-error-in-rtnl_dump_ifinfo.patch new file mode 100644 index 00000000000..73ef1cc1ecf --- /dev/null +++ b/queue-4.8/rtnl-fix-the-loop-index-update-error-in-rtnl_dump_ifinfo.patch @@ -0,0 +1,33 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Zhang Shengju +Date: Sat, 19 Nov 2016 23:28:32 +0800 +Subject: rtnl: fix the loop index update error in rtnl_dump_ifinfo() + +From: Zhang Shengju + + +[ Upstream commit 3f0ae05d6fea0ed5b19efdbc9c9f8e02685a3af3 ] + +If the link is filtered out, loop index should also be updated. If not, +loop index will not be correct. + +Fixes: dc599f76c22b0 ("net: Add support for filtering link dump by master device and kind") +Signed-off-by: Zhang Shengju +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -1578,7 +1578,7 @@ static int rtnl_dump_ifinfo(struct sk_bu + head = &net->dev_index_head[h]; + hlist_for_each_entry(dev, head, index_hlist) { + if (link_dump_filtered(dev, master_idx, kind_ops)) +- continue; ++ goto cont; + if (idx < s_idx) + goto cont; + err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, diff --git a/queue-4.8/sh_eth-remove-unchecked-interrupts-for-rz-a1.patch b/queue-4.8/sh_eth-remove-unchecked-interrupts-for-rz-a1.patch new file mode 100644 index 00000000000..b55ff059297 --- /dev/null +++ b/queue-4.8/sh_eth-remove-unchecked-interrupts-for-rz-a1.patch @@ -0,0 +1,40 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Chris Brandt +Date: Thu, 1 Dec 2016 13:32:14 -0500 +Subject: sh_eth: remove unchecked interrupts for RZ/A1 + +From: Chris Brandt + + +[ Upstream commit 33d446dbba4d4d6a77e1e900d434fa99e0f02c86 ] + +When streaming a lot of data and the RZ/A1 can't keep up, some status bits +will get set that are not being checked or cleared which cause the +following messages and the Ethernet driver to stop working. This +patch fixes that issue. + +irq 21: nobody cared (try booting with the "irqpoll" option) +handlers: +[] sh_eth_interrupt +Disabling IRQ #21 + +Fixes: db893473d313a4ad ("sh_eth: Add support for r7s72100") +Signed-off-by: Chris Brandt +Acked-by: Sergei Shtylyov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/renesas/sh_eth.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/renesas/sh_eth.c ++++ b/drivers/net/ethernet/renesas/sh_eth.c +@@ -518,7 +518,7 @@ static struct sh_eth_cpu_data r7s72100_d + + .ecsr_value = ECSR_ICD, + .ecsipr_value = ECSIPR_ICDIP, +- .eesipr_value = 0xff7f009f, ++ .eesipr_value = 0xe77f009f, + + .tx_check = EESR_TC1 | EESR_FTC, + .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | diff --git a/queue-4.8/sparc32-fix-inverted-invalid_frame_pointer-checks-on-sigreturns.patch b/queue-4.8/sparc32-fix-inverted-invalid_frame_pointer-checks-on-sigreturns.patch new file mode 100644 index 00000000000..d3de83ee9d6 --- /dev/null +++ b/queue-4.8/sparc32-fix-inverted-invalid_frame_pointer-checks-on-sigreturns.patch @@ -0,0 +1,37 @@ +From foo@baz Thu Dec 8 07:19:56 CET 2016 +From: Andreas Larsson +Date: Wed, 9 Nov 2016 10:43:05 +0100 +Subject: sparc32: Fix inverted invalid_frame_pointer checks on sigreturns + +From: Andreas Larsson + + +[ Upstream commit 07b5ab3f71d318e52c18cc3b73c1d44c908aacfa ] + +Signed-off-by: Andreas Larsson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/signal_32.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/sparc/kernel/signal_32.c ++++ b/arch/sparc/kernel/signal_32.c +@@ -89,7 +89,7 @@ asmlinkage void do_sigreturn(struct pt_r + sf = (struct signal_frame __user *) regs->u_regs[UREG_FP]; + + /* 1. Make sure we are not getting garbage from the user */ +- if (!invalid_frame_pointer(sf, sizeof(*sf))) ++ if (invalid_frame_pointer(sf, sizeof(*sf))) + goto segv_and_exit; + + if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP])) +@@ -150,7 +150,7 @@ asmlinkage void do_rt_sigreturn(struct p + + synchronize_user_stack(); + sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP]; +- if (!invalid_frame_pointer(sf, sizeof(*sf))) ++ if (invalid_frame_pointer(sf, sizeof(*sf))) + goto segv; + + if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) diff --git a/queue-4.8/sparc64-fix-compile-warning-section-mismatch-in-find_node.patch b/queue-4.8/sparc64-fix-compile-warning-section-mismatch-in-find_node.patch new file mode 100644 index 00000000000..43c62504590 --- /dev/null +++ b/queue-4.8/sparc64-fix-compile-warning-section-mismatch-in-find_node.patch @@ -0,0 +1,52 @@ +From foo@baz Thu Dec 8 07:19:56 CET 2016 +From: Thomas Tai +Date: Fri, 11 Nov 2016 16:41:00 -0800 +Subject: sparc64: fix compile warning section mismatch in find_node() + +From: Thomas Tai + + +[ Upstream commit 87a349f9cc0908bc0cfac0c9ece3179f650ae95a ] + +A compile warning is introduced by a commit to fix the find_node(). +This patch fix the compile warning by moving find_node() into __init +section. Because find_node() is only used by memblock_nid_range() which +is only used by a __init add_node_ranges(). find_node() and +memblock_nid_range() should also be inside __init section. + +Signed-off-by: Thomas Tai +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/init_64.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -805,7 +805,7 @@ static int num_mblocks; + static int find_numa_node_for_addr(unsigned long pa, + struct node_mem_mask *pnode_mask); + +-static unsigned long ra_to_pa(unsigned long addr) ++static unsigned long __init ra_to_pa(unsigned long addr) + { + int i; + +@@ -821,7 +821,7 @@ static unsigned long ra_to_pa(unsigned l + return addr; + } + +-static int find_node(unsigned long addr) ++static int __init find_node(unsigned long addr) + { + static bool search_mdesc = true; + static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL }; +@@ -858,7 +858,7 @@ static int find_node(unsigned long addr) + return last_index; + } + +-static u64 memblock_nid_range(u64 start, u64 end, int *nid) ++static u64 __init memblock_nid_range(u64 start, u64 end, int *nid) + { + *nid = find_node(start); + start += PAGE_SIZE; diff --git a/queue-4.8/sparc64-fix-find_node-warning-if-numa-node-cannot-be-found.patch b/queue-4.8/sparc64-fix-find_node-warning-if-numa-node-cannot-be-found.patch new file mode 100644 index 00000000000..405e591be7a --- /dev/null +++ b/queue-4.8/sparc64-fix-find_node-warning-if-numa-node-cannot-be-found.patch @@ -0,0 +1,148 @@ +From foo@baz Thu Dec 8 07:19:56 CET 2016 +From: Thomas Tai +Date: Thu, 3 Nov 2016 09:19:01 -0700 +Subject: sparc64: Fix find_node warning if numa node cannot be found + +From: Thomas Tai + + +[ Upstream commit 74a5ed5c4f692df2ff0a2313ea71e81243525519 ] + +When booting up LDOM, find_node() warns that a physical address +doesn't match a NUMA node. + +WARNING: CPU: 0 PID: 0 at arch/sparc/mm/init_64.c:835 +find_node+0xf4/0x120 find_node: A physical address doesn't +match a NUMA node rule. Some physical memory will be +owned by node 0.Modules linked in: + +CPU: 0 PID: 0 Comm: swapper Not tainted 4.9.0-rc3 #4 +Call Trace: + [0000000000468ba0] __warn+0xc0/0xe0 + [0000000000468c74] warn_slowpath_fmt+0x34/0x60 + [00000000004592f4] find_node+0xf4/0x120 + [0000000000dd0774] add_node_ranges+0x38/0xe4 + [0000000000dd0b1c] numa_parse_mdesc+0x268/0x2e4 + [0000000000dd0e9c] bootmem_init+0xb8/0x160 + [0000000000dd174c] paging_init+0x808/0x8fc + [0000000000dcb0d0] setup_arch+0x2c8/0x2f0 + [0000000000dc68a0] start_kernel+0x48/0x424 + [0000000000dcb374] start_early_boot+0x27c/0x28c + [0000000000a32c08] tlb_fixup_done+0x4c/0x64 + [0000000000027f08] 0x27f08 + +It is because linux use an internal structure node_masks[] to +keep the best memory latency node only. However, LDOM mdesc can +contain single latency-group with multiple memory latency nodes. + +If the address doesn't match the best latency node within +node_masks[], it should check for an alternative via mdesc. +The warning message should only be printed if the address +doesn't match any node_masks[] nor within mdesc. To minimize +the impact of searching mdesc every time, the last matched +mask and index is stored in a variable. + +Signed-off-by: Thomas Tai +Reviewed-by: Chris Hyser +Reviewed-by: Liam Merwick +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/init_64.c | 65 +++++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 61 insertions(+), 4 deletions(-) + +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -802,6 +802,8 @@ struct mdesc_mblock { + }; + static struct mdesc_mblock *mblocks; + static int num_mblocks; ++static int find_numa_node_for_addr(unsigned long pa, ++ struct node_mem_mask *pnode_mask); + + static unsigned long ra_to_pa(unsigned long addr) + { +@@ -821,6 +823,9 @@ static unsigned long ra_to_pa(unsigned l + + static int find_node(unsigned long addr) + { ++ static bool search_mdesc = true; ++ static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL }; ++ static int last_index; + int i; + + addr = ra_to_pa(addr); +@@ -830,10 +835,27 @@ static int find_node(unsigned long addr) + if ((addr & p->mask) == p->val) + return i; + } +- /* The following condition has been observed on LDOM guests.*/ +- WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node" +- " rule. Some physical memory will be owned by node 0."); +- return 0; ++ /* The following condition has been observed on LDOM guests because ++ * node_masks only contains the best latency mask and value. ++ * LDOM guest's mdesc can contain a single latency group to ++ * cover multiple address range. Print warning message only if the ++ * address cannot be found in node_masks nor mdesc. ++ */ ++ if ((search_mdesc) && ++ ((addr & last_mem_mask.mask) != last_mem_mask.val)) { ++ /* find the available node in the mdesc */ ++ last_index = find_numa_node_for_addr(addr, &last_mem_mask); ++ numadbg("find_node: latency group for address 0x%lx is %d\n", ++ addr, last_index); ++ if ((last_index < 0) || (last_index >= num_node_masks)) { ++ /* WARN_ONCE() and use default group 0 */ ++ WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0."); ++ search_mdesc = false; ++ last_index = 0; ++ } ++ } ++ ++ return last_index; + } + + static u64 memblock_nid_range(u64 start, u64 end, int *nid) +@@ -1160,6 +1182,41 @@ int __node_distance(int from, int to) + return numa_latency[from][to]; + } + ++static int find_numa_node_for_addr(unsigned long pa, ++ struct node_mem_mask *pnode_mask) ++{ ++ struct mdesc_handle *md = mdesc_grab(); ++ u64 node, arc; ++ int i = 0; ++ ++ node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); ++ if (node == MDESC_NODE_NULL) ++ goto out; ++ ++ mdesc_for_each_node_by_name(md, node, "group") { ++ mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) { ++ u64 target = mdesc_arc_target(md, arc); ++ struct mdesc_mlgroup *m = find_mlgroup(target); ++ ++ if (!m) ++ continue; ++ if ((pa & m->mask) == m->match) { ++ if (pnode_mask) { ++ pnode_mask->mask = m->mask; ++ pnode_mask->val = m->match; ++ } ++ mdesc_release(md); ++ return i; ++ } ++ } ++ i++; ++ } ++ ++out: ++ mdesc_release(md); ++ return -1; ++} ++ + static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) + { + int i; diff --git a/queue-4.8/tipc-check-minimum-bearer-mtu.patch b/queue-4.8/tipc-check-minimum-bearer-mtu.patch new file mode 100644 index 00000000000..115fae1d6a7 --- /dev/null +++ b/queue-4.8/tipc-check-minimum-bearer-mtu.patch @@ -0,0 +1,116 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Michal Kubeček +Date: Fri, 2 Dec 2016 09:33:41 +0100 +Subject: tipc: check minimum bearer MTU + +From: Michal Kubeček + + +[ Upstream commit 3de81b758853f0b29c61e246679d20b513c4cfec ] + +Qian Zhang (张谦) reported a potential socket buffer overflow in +tipc_msg_build() which is also known as CVE-2016-8632: due to +insufficient checks, a buffer overflow can occur if MTU is too short for +even tipc headers. As anyone can set device MTU in a user/net namespace, +this issue can be abused by a regular user. + +As agreed in the discussion on Ben Hutchings' original patch, we should +check the MTU at the moment a bearer is attached rather than for each +processed packet. We also need to repeat the check when bearer MTU is +adjusted to new device MTU. UDP case also needs a check to avoid +overflow when calculating bearer MTU. + +Fixes: b97bf3fd8f6a ("[TIPC] Initial merge") +Signed-off-by: Michal Kubecek +Reported-by: Qian Zhang (张谦) +Acked-by: Ying Xue +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/bearer.c | 11 +++++++++-- + net/tipc/bearer.h | 13 +++++++++++++ + net/tipc/udp_media.c | 5 +++++ + 3 files changed, 27 insertions(+), 2 deletions(-) + +--- a/net/tipc/bearer.c ++++ b/net/tipc/bearer.c +@@ -402,6 +402,10 @@ int tipc_enable_l2_media(struct net *net + dev = dev_get_by_name(net, driver_name); + if (!dev) + return -ENODEV; ++ if (tipc_mtu_bad(dev, 0)) { ++ dev_put(dev); ++ return -EINVAL; ++ } + + /* Associate TIPC bearer with L2 bearer */ + rcu_assign_pointer(b->media_ptr, dev); +@@ -606,8 +610,6 @@ static int tipc_l2_device_event(struct n + if (!b) + return NOTIFY_DONE; + +- b->mtu = dev->mtu; +- + switch (evt) { + case NETDEV_CHANGE: + if (netif_carrier_ok(dev)) +@@ -621,6 +623,11 @@ static int tipc_l2_device_event(struct n + tipc_reset_bearer(net, b); + break; + case NETDEV_CHANGEMTU: ++ if (tipc_mtu_bad(dev, 0)) { ++ bearer_disable(net, b); ++ break; ++ } ++ b->mtu = dev->mtu; + tipc_reset_bearer(net, b); + break; + case NETDEV_CHANGEADDR: +--- a/net/tipc/bearer.h ++++ b/net/tipc/bearer.h +@@ -39,6 +39,7 @@ + + #include "netlink.h" + #include "core.h" ++#include "msg.h" + #include + + #define MAX_MEDIA 3 +@@ -59,6 +60,9 @@ + #define TIPC_MEDIA_TYPE_IB 2 + #define TIPC_MEDIA_TYPE_UDP 3 + ++/* minimum bearer MTU */ ++#define TIPC_MIN_BEARER_MTU (MAX_H_SIZE + INT_H_SIZE) ++ + /** + * struct tipc_media_addr - destination address used by TIPC bearers + * @value: address info (format defined by media) +@@ -213,4 +217,13 @@ void tipc_bearer_xmit(struct net *net, u + void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq); + ++/* check if device MTU is too low for tipc headers */ ++static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve) ++{ ++ if (dev->mtu >= TIPC_MIN_BEARER_MTU + reserve) ++ return false; ++ netdev_warn(dev, "MTU too low for tipc bearer\n"); ++ return true; ++} ++ + #endif /* _TIPC_BEARER_H */ +--- a/net/tipc/udp_media.c ++++ b/net/tipc/udp_media.c +@@ -372,6 +372,11 @@ static int tipc_udp_enable(struct net *n + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + udp_conf.use_udp_checksums = false; + ub->ifindex = dev->ifindex; ++ if (tipc_mtu_bad(dev, sizeof(struct iphdr) + ++ sizeof(struct udphdr))) { ++ err = -EINVAL; ++ goto err; ++ } + b->mtu = dev->mtu - sizeof(struct iphdr) + - sizeof(struct udphdr); + #if IS_ENABLED(CONFIG_IPV6) diff --git a/queue-4.8/udplite-call-proper-backlog-handlers.patch b/queue-4.8/udplite-call-proper-backlog-handlers.patch new file mode 100644 index 00000000000..9f0d9cb8925 --- /dev/null +++ b/queue-4.8/udplite-call-proper-backlog-handlers.patch @@ -0,0 +1,108 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Eric Dumazet +Date: Tue, 22 Nov 2016 09:06:45 -0800 +Subject: udplite: call proper backlog handlers + +From: Eric Dumazet + + +[ Upstream commit 30c7be26fd3587abcb69587f781098e3ca2d565b ] + +In commits 93821778def10 ("udp: Fix rcv socket locking") and +f7ad74fef3af ("net/ipv6/udp: UDP encapsulation: break backlog_rcv into +__udpv6_queue_rcv_skb") UDP backlog handlers were renamed, but UDPlite +was forgotten. + +This leads to crashes if UDPlite header is pulled twice, which happens +starting from commit e6afc8ace6dd ("udp: remove headers from UDP packets +before queueing") + +Bug found by syzkaller team, thanks a lot guys ! + +Note that backlog use in UDP/UDPlite is scheduled to be removed starting +from linux-4.10, so this patch is only needed up to linux-4.9 + +Fixes: 93821778def1 ("udp: Fix rcv socket locking") +Fixes: f7ad74fef3af ("net/ipv6/udp: UDP encapsulation: break backlog_rcv into __udpv6_queue_rcv_skb") +Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") +Signed-off-by: Eric Dumazet +Reported-by: Andrey Konovalov +Cc: Benjamin LaHaise +Cc: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 2 +- + net/ipv4/udp_impl.h | 2 +- + net/ipv4/udplite.c | 2 +- + net/ipv6/udp.c | 2 +- + net/ipv6/udp_impl.h | 2 +- + net/ipv6/udplite.c | 2 +- + 6 files changed, 6 insertions(+), 6 deletions(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1451,7 +1451,7 @@ static void udp_v4_rehash(struct sock *s + udp_lib_rehash(sk, new_hash); + } + +-static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ++int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) + { + int rc; + +--- a/net/ipv4/udp_impl.h ++++ b/net/ipv4/udp_impl.h +@@ -25,7 +25,7 @@ int udp_recvmsg(struct sock *sk, struct + int flags, int *addr_len); + int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, + int flags); +-int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); ++int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + void udp_destroy_sock(struct sock *sk); + + #ifdef CONFIG_PROC_FS +--- a/net/ipv4/udplite.c ++++ b/net/ipv4/udplite.c +@@ -50,7 +50,7 @@ struct proto udplite_prot = { + .sendmsg = udp_sendmsg, + .recvmsg = udp_recvmsg, + .sendpage = udp_sendpage, +- .backlog_rcv = udp_queue_rcv_skb, ++ .backlog_rcv = __udp_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .get_port = udp_v4_get_port, +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -514,7 +514,7 @@ out: + return; + } + +-static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ++int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) + { + int rc; + +--- a/net/ipv6/udp_impl.h ++++ b/net/ipv6/udp_impl.h +@@ -26,7 +26,7 @@ int compat_udpv6_getsockopt(struct sock + int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); + int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len); +-int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); ++int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + void udpv6_destroy_sock(struct sock *sk); + + void udp_v6_clear_sk(struct sock *sk, int size); +--- a/net/ipv6/udplite.c ++++ b/net/ipv6/udplite.c +@@ -45,7 +45,7 @@ struct proto udplitev6_prot = { + .getsockopt = udpv6_getsockopt, + .sendmsg = udpv6_sendmsg, + .recvmsg = udpv6_recvmsg, +- .backlog_rcv = udpv6_queue_rcv_skb, ++ .backlog_rcv = __udpv6_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .get_port = udp_v6_get_port, diff --git a/queue-4.8/virtio-net-add-a-missing-synchronize_net.patch b/queue-4.8/virtio-net-add-a-missing-synchronize_net.patch new file mode 100644 index 00000000000..c2a03cb151b --- /dev/null +++ b/queue-4.8/virtio-net-add-a-missing-synchronize_net.patch @@ -0,0 +1,40 @@ +From foo@baz Thu Dec 8 07:19:12 CET 2016 +From: Eric Dumazet +Date: Tue, 15 Nov 2016 22:24:12 -0800 +Subject: virtio-net: add a missing synchronize_net() + +From: Eric Dumazet + + +[ Upstream commit 963abe5c8a0273a1cf5913556da1b1189de0e57a ] + +It seems many drivers do not respect napi_hash_del() contract. + +When napi_hash_del() is used before netif_napi_del(), an RCU grace +period is needed before freeing NAPI object. + +Fixes: 91815639d880 ("virtio-net: rx busy polling support") +Signed-off-by: Eric Dumazet +Cc: Jason Wang +Cc: Michael S. Tsirkin +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -1468,6 +1468,11 @@ static void virtnet_free_queues(struct v + netif_napi_del(&vi->rq[i].napi); + } + ++ /* We called napi_hash_del() before netif_napi_del(), ++ * we need to respect an RCU grace period before freeing vi->rq ++ */ ++ synchronize_net(); ++ + kfree(vi->rq); + kfree(vi->sq); + }