From: Greg Kroah-Hartman Date: Thu, 8 Jun 2017 06:59:09 +0000 (+0200) Subject: 4.11-stable patches X-Git-Tag: v3.18.57~41 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3e8ec92d0a3b3baf2496225dadb5e8ff31214918;p=thirdparty%2Fkernel%2Fstable-queue.git 4.11-stable patches added patches: arch-sparc-support-nr_cpus-4096.patch bnx2x-fix-multi-cos.patch cxgb4-avoid-enabling-napi-twice-to-the-same-queue.patch geneve-fix-needed_headroom-and-max_mtu-for-collect_metadata.patch ip6_tunnel-fix-traffic-class-routing-for-tunnels.patch ipv6-fix-leak-in-ipv6_gso_segment.patch ipv6-xfrm-handle-errors-reported-by-xfrm6_find_1stfragopt.patch net-bridge-fix-a-null-pointer-dereference-in-br_afspec.patch net-bridge-start-hello-timer-only-if-device-is-up.patch net-dsa-fix-stale-cpu_switch-reference-after-unbind-then-bind.patch net-ethoc-enable-napi-before-poll-may-be-scheduled.patch net-ipv6-fix-calipso-causing-gpf-with-datagram-support.patch net-ping-do-not-abuse-udp_poll.patch net-stmmac-fix-completely-hung-tx-when-using-tso.patch net-systemport-fix-missing-wake-on-lan-interrupt-for-systemport-lite.patch ravb-fix-use-after-free-on-ifconfig-eth0-down.patch sock-reset-sk_err-when-the-error-queue-is-empty.patch sparc-machine-description-indices-can-vary.patch sparc-mm-hugepages-fix-setup_hugepagesz-for-invalid-values.patch sparc64-add-__multi3-for-gcc-7.x-and-later.patch sparc64-add-per-cpu-mm-of-secondary-contexts.patch sparc64-combine-activate_mm-and-switch_mm.patch sparc64-delete-old-wrap-code.patch sparc64-mm-fix-copy_tsb-to-correctly-copy-huge-page-tsbs.patch sparc64-new-context-wrap.patch sparc64-redefine-first-version.patch sparc64-reset-mm-cpumask-after-wrap.patch tcp-disallow-cwnd-undo-when-switching-congestion-control.patch vxlan-eliminate-cached-dst-leak.patch vxlan-fix-use-after-free-on-deletion.patch --- diff --git a/queue-4.11/arch-sparc-support-nr_cpus-4096.patch b/queue-4.11/arch-sparc-support-nr_cpus-4096.patch new file mode 100644 index 00000000000..f8e60c918b6 --- /dev/null +++ b/queue-4.11/arch-sparc-support-nr_cpus-4096.patch @@ -0,0 +1,79 @@ +From foo@baz Thu Jun 8 08:58:43 CEST 2017 +From: Jane Chu +Date: Tue, 6 Jun 2017 14:32:29 -0600 +Subject: arch/sparc: support NR_CPUS = 4096 + +From: Jane Chu + + +[ Upstream commit c79a13734d104b5b147d7cb0870276ccdd660dae ] + +Linux SPARC64 limits NR_CPUS to 4064 because init_cpu_send_mondo_info() +only allocates a single page for NR_CPUS mondo entries. Thus we cannot +use all 4096 CPUs on some SPARC platforms. + +To fix, allocate (2^order) pages where order is set according to the size +of cpu_list for possible cpus. Since cpu_list_pa and cpu_mondo_block_pa +are not used in asm code, there are no imm13 offsets from the base PA +that will break because they can only reach one page. + +Orabug: 25505750 + +Signed-off-by: Jane Chu + +Reviewed-by: Bob Picco +Reviewed-by: Atish Patra +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/Kconfig | 4 ++-- + arch/sparc/kernel/irq_64.c | 17 +++++++++++++---- + 2 files changed, 15 insertions(+), 6 deletions(-) + +--- a/arch/sparc/Kconfig ++++ b/arch/sparc/Kconfig +@@ -192,9 +192,9 @@ config NR_CPUS + int "Maximum number of CPUs" + depends on SMP + range 2 32 if SPARC32 +- range 2 1024 if SPARC64 ++ range 2 4096 if SPARC64 + default 32 if SPARC32 +- default 64 if SPARC64 ++ default 4096 if SPARC64 + + source kernel/Kconfig.hz + +--- a/arch/sparc/kernel/irq_64.c ++++ b/arch/sparc/kernel/irq_64.c +@@ -1034,17 +1034,26 @@ static void __init init_cpu_send_mondo_i + { + #ifdef CONFIG_SMP + unsigned long page; ++ void *mondo, *p; + +- BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64)); ++ BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > PAGE_SIZE); ++ ++ /* Make sure mondo block is 64byte aligned */ ++ p = kzalloc(127, GFP_KERNEL); ++ if (!p) { ++ prom_printf("SUN4V: Error, cannot allocate mondo block.\n"); ++ prom_halt(); ++ } ++ mondo = (void *)(((unsigned long)p + 63) & ~0x3f); ++ tb->cpu_mondo_block_pa = __pa(mondo); + + page = get_zeroed_page(GFP_KERNEL); + if (!page) { +- prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n"); ++ prom_printf("SUN4V: Error, cannot allocate cpu list page.\n"); + prom_halt(); + } + +- tb->cpu_mondo_block_pa = __pa(page); +- tb->cpu_list_pa = __pa(page + 64); ++ tb->cpu_list_pa = __pa(page); + #endif + } + diff --git a/queue-4.11/bnx2x-fix-multi-cos.patch b/queue-4.11/bnx2x-fix-multi-cos.patch new file mode 100644 index 00000000000..4e36215a6c6 --- /dev/null +++ b/queue-4.11/bnx2x-fix-multi-cos.patch @@ -0,0 +1,38 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: "Mintz, Yuval" +Date: Thu, 1 Jun 2017 15:57:56 +0300 +Subject: bnx2x: Fix Multi-Cos + +From: "Mintz, Yuval" + + +[ Upstream commit 3968d38917eb9bd0cd391265f6c9c538d9b33ffa ] + +Apparently multi-cos isn't working for bnx2x quite some time - +driver implements ndo_select_queue() to allow queue-selection +for FCoE, but the regular L2 flow would cause it to modulo the +fallback's result by the number of queues. +The fallback would return a queue matching the needed tc +[via __skb_tx_hash()], but since the modulo is by the number of TSS +queues where number of TCs is not accounted, transmission would always +be done by a queue configured into using TC0. + +Fixes: ada7c19e6d27 ("bnx2x: use XPS if possible for bnx2x_select_queue instead of pure hash") +Signed-off-by: Yuval Mintz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +@@ -1926,7 +1926,7 @@ u16 bnx2x_select_queue(struct net_device + } + + /* select a non-FCoE queue */ +- return fallback(dev, skb) % BNX2X_NUM_ETH_QUEUES(bp); ++ return fallback(dev, skb) % (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos); + } + + void bnx2x_set_num_queues(struct bnx2x *bp) diff --git a/queue-4.11/cxgb4-avoid-enabling-napi-twice-to-the-same-queue.patch b/queue-4.11/cxgb4-avoid-enabling-napi-twice-to-the-same-queue.patch new file mode 100644 index 00000000000..cb6758fa15e --- /dev/null +++ b/queue-4.11/cxgb4-avoid-enabling-napi-twice-to-the-same-queue.patch @@ -0,0 +1,38 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Ganesh Goudar +Date: Wed, 31 May 2017 18:26:28 +0530 +Subject: cxgb4: avoid enabling napi twice to the same queue + +From: Ganesh Goudar + + +[ Upstream commit e7519f9926f1d0d11c776eb0475eb098c7760f68 ] + +Take uld mutex to avoid race between cxgb_up() and +cxgb4_register_uld() to enable napi for the same uld +queue. + +Signed-off-by: Ganesh Goudar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c ++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +@@ -2217,10 +2217,14 @@ static int cxgb_up(struct adapter *adap) + if (err) + goto irq_err; + } ++ ++ mutex_lock(&uld_mutex); + enable_rx(adap); + t4_sge_start(adap); + t4_intr_enable(adap); + adap->flags |= FULL_INIT_DONE; ++ mutex_unlock(&uld_mutex); ++ + notify_ulds(adap, CXGB4_STATE_UP); + #if IS_ENABLED(CONFIG_IPV6) + update_clip(adap); diff --git a/queue-4.11/geneve-fix-needed_headroom-and-max_mtu-for-collect_metadata.patch b/queue-4.11/geneve-fix-needed_headroom-and-max_mtu-for-collect_metadata.patch new file mode 100644 index 00000000000..13924caedf8 --- /dev/null +++ b/queue-4.11/geneve-fix-needed_headroom-and-max_mtu-for-collect_metadata.patch @@ -0,0 +1,39 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Eric Garver +Date: Fri, 2 Jun 2017 14:54:10 -0400 +Subject: geneve: fix needed_headroom and max_mtu for collect_metadata + +From: Eric Garver + + +[ Upstream commit 9a1c44d989bff4c992b8b9a112d9fda275ea5515 ] + +Since commit 9b4437a5b870 ("geneve: Unify LWT and netdev handling.") +when using COLLECT_METADATA geneve devices are created with too small of +a needed_headroom and too large of a max_mtu. This is because +ip_tunnel_info_af() is not valid with the device level info when using +COLLECT_METADATA and we mistakenly fall into the IPv4 case. + +For COLLECT_METADATA, always use the worst case of ipv6 since both +sockets are created. + +Fixes: 9b4437a5b870 ("geneve: Unify LWT and netdev handling.") +Signed-off-by: Eric Garver +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/geneve.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/geneve.c ++++ b/drivers/net/geneve.c +@@ -1133,7 +1133,7 @@ static int geneve_configure(struct net * + + /* make enough headroom for basic scenario */ + encap_len = GENEVE_BASE_HLEN + ETH_HLEN; +- if (ip_tunnel_info_af(info) == AF_INET) { ++ if (!metadata && ip_tunnel_info_af(info) == AF_INET) { + encap_len += sizeof(struct iphdr); + dev->max_mtu -= sizeof(struct iphdr); + } else { diff --git a/queue-4.11/ip6_tunnel-fix-traffic-class-routing-for-tunnels.patch b/queue-4.11/ip6_tunnel-fix-traffic-class-routing-for-tunnels.patch new file mode 100644 index 00000000000..2aa4967b084 --- /dev/null +++ b/queue-4.11/ip6_tunnel-fix-traffic-class-routing-for-tunnels.patch @@ -0,0 +1,41 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Liam McBirnie +Date: Thu, 1 Jun 2017 15:36:01 +1000 +Subject: ip6_tunnel: fix traffic class routing for tunnels + +From: Liam McBirnie + + +[ Upstream commit 5f733ee68f9a4df94775299ac6a7ab260704f6ed ] + +ip6_route_output() requires that the flowlabel contains the traffic +class for policy routing. + +Commit 0e9a709560db ("ip6_tunnel, ip6_gre: fix setting of DSCP on +encapsulated packets") removed the code which previously added the +traffic class to the flowlabel. + +The traffic class is added here because only route lookup needs the +flowlabel to contain the traffic class. + +Fixes: 0e9a709560db ("ip6_tunnel, ip6_gre: fix setting of DSCP on encapsulated packets") +Signed-off-by: Liam McBirnie +Acked-by: Peter Dawson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1095,6 +1095,9 @@ int ip6_tnl_xmit(struct sk_buff *skb, st + + if (!dst) { + route_lookup: ++ /* add dsfield to flowlabel for route lookup */ ++ fl6->flowlabel = ip6_make_flowinfo(dsfield, fl6->flowlabel); ++ + dst = ip6_route_output(net, NULL, fl6); + + if (dst->error) diff --git a/queue-4.11/ipv6-fix-leak-in-ipv6_gso_segment.patch b/queue-4.11/ipv6-fix-leak-in-ipv6_gso_segment.patch new file mode 100644 index 00000000000..5760c9ee913 --- /dev/null +++ b/queue-4.11/ipv6-fix-leak-in-ipv6_gso_segment.patch @@ -0,0 +1,35 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: "David S. Miller" +Date: Sun, 4 Jun 2017 21:41:10 -0400 +Subject: ipv6: Fix leak in ipv6_gso_segment(). + +From: "David S. Miller" + + +[ Upstream commit e3e86b5119f81e5e2499bea7ea1ebe8ac6aab789 ] + +If ip6_find_1stfragopt() fails and we return an error we have to free +up 'segs' because nobody else is going to. + +Fixes: 2423496af35d ("ipv6: Prevent overrun when parsing v6 header options") +Reported-by: Ben Hutchings +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_offload.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ipv6/ip6_offload.c ++++ b/net/ipv6/ip6_offload.c +@@ -116,8 +116,10 @@ static struct sk_buff *ipv6_gso_segment( + + if (udpfrag) { + int err = ip6_find_1stfragopt(skb, &prevhdr); +- if (err < 0) ++ if (err < 0) { ++ kfree_skb_list(segs); + return ERR_PTR(err); ++ } + fptr = (struct frag_hdr *)((u8 *)ipv6h + err); + fptr->frag_off = htons(offset); + if (skb->next) diff --git a/queue-4.11/ipv6-xfrm-handle-errors-reported-by-xfrm6_find_1stfragopt.patch b/queue-4.11/ipv6-xfrm-handle-errors-reported-by-xfrm6_find_1stfragopt.patch new file mode 100644 index 00000000000..449d8e2b9d8 --- /dev/null +++ b/queue-4.11/ipv6-xfrm-handle-errors-reported-by-xfrm6_find_1stfragopt.patch @@ -0,0 +1,45 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Ben Hutchings +Date: Wed, 31 May 2017 13:15:41 +0100 +Subject: ipv6: xfrm: Handle errors reported by xfrm6_find_1stfragopt() + +From: Ben Hutchings + + +[ Upstream commit 6e80ac5cc992ab6256c3dae87f7e57db15e1a58c ] + +xfrm6_find_1stfragopt() may now return an error code and we must +not treat it as a length. + +Fixes: 2423496af35d ("ipv6: Prevent overrun when parsing v6 header options") +Signed-off-by: Ben Hutchings +Acked-by: Craig Gallek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/xfrm6_mode_ro.c | 2 ++ + net/ipv6/xfrm6_mode_transport.c | 2 ++ + 2 files changed, 4 insertions(+) + +--- a/net/ipv6/xfrm6_mode_ro.c ++++ b/net/ipv6/xfrm6_mode_ro.c +@@ -47,6 +47,8 @@ static int xfrm6_ro_output(struct xfrm_s + iph = ipv6_hdr(skb); + + hdr_len = x->type->hdr_offset(x, skb, &prevhdr); ++ if (hdr_len < 0) ++ return hdr_len; + skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); + skb_set_network_header(skb, -x->props.header_len); + skb->transport_header = skb->network_header + hdr_len; +--- a/net/ipv6/xfrm6_mode_transport.c ++++ b/net/ipv6/xfrm6_mode_transport.c +@@ -28,6 +28,8 @@ static int xfrm6_transport_output(struct + iph = ipv6_hdr(skb); + + hdr_len = x->type->hdr_offset(x, skb, &prevhdr); ++ if (hdr_len < 0) ++ return hdr_len; + skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); + skb_set_network_header(skb, -x->props.header_len); + skb->transport_header = skb->network_header + hdr_len; diff --git a/queue-4.11/net-bridge-fix-a-null-pointer-dereference-in-br_afspec.patch b/queue-4.11/net-bridge-fix-a-null-pointer-dereference-in-br_afspec.patch new file mode 100644 index 00000000000..aac7983d6e7 --- /dev/null +++ b/queue-4.11/net-bridge-fix-a-null-pointer-dereference-in-br_afspec.patch @@ -0,0 +1,36 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Nikolay Aleksandrov +Date: Tue, 6 Jun 2017 01:26:24 +0300 +Subject: net: bridge: fix a null pointer dereference in br_afspec + +From: Nikolay Aleksandrov + + +[ Upstream commit 1020ce3108cc26fbf09d70550ea2937cb1a211d2 ] + +We might call br_afspec() with p == NULL which is a valid use case if +the action is on the bridge device itself, but the bridge tunnel code +dereferences the p pointer without checking, so check if p is null +first. + +Reported-by: Gustavo A. R. Silva +Fixes: efa5356b0d97 ("bridge: per vlan dst_metadata netlink support") +Signed-off-by: Nikolay Aleksandrov +Acked-by: Roopa Prabhu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/bridge/br_netlink.c ++++ b/net/bridge/br_netlink.c +@@ -591,7 +591,7 @@ static int br_afspec(struct net_bridge * + err = 0; + switch (nla_type(attr)) { + case IFLA_BRIDGE_VLAN_TUNNEL_INFO: +- if (!(p->flags & BR_VLAN_TUNNEL)) ++ if (!p || !(p->flags & BR_VLAN_TUNNEL)) + return -EINVAL; + err = br_parse_vlan_tunnel_info(attr, &tinfo_curr); + if (err) diff --git a/queue-4.11/net-bridge-start-hello-timer-only-if-device-is-up.patch b/queue-4.11/net-bridge-start-hello-timer-only-if-device-is-up.patch new file mode 100644 index 00000000000..109955547f3 --- /dev/null +++ b/queue-4.11/net-bridge-start-hello-timer-only-if-device-is-up.patch @@ -0,0 +1,44 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Nikolay Aleksandrov +Date: Thu, 1 Jun 2017 18:07:55 +0300 +Subject: net: bridge: start hello timer only if device is up + +From: Nikolay Aleksandrov + + +[ Upstream commit aeb073241fe7a2b932e04e20c60e47718332877f ] + +When the transition of NO_STP -> KERNEL_STP was fixed by always calling +mod_timer in br_stp_start, it introduced a new regression which causes +the timer to be armed even when the bridge is down, and since we stop +the timers in its ndo_stop() function, they never get disabled if the +device is destroyed before it's upped. + +To reproduce: +$ while :; do ip l add br0 type bridge hello_time 100; brctl stp br0 on; +ip l del br0; done; + +CC: Xin Long +CC: Ivan Vecera +CC: Sebastian Ott +Reported-by: Sebastian Ott +Fixes: 6d18c732b95c ("bridge: start hello_timer when enabling KERNEL_STP in br_stp_start") +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_stp_if.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/bridge/br_stp_if.c ++++ b/net/bridge/br_stp_if.c +@@ -179,7 +179,8 @@ static void br_stp_start(struct net_brid + br_debug(br, "using kernel STP\n"); + + /* To start timers on any ports left in blocking */ +- mod_timer(&br->hello_timer, jiffies + br->hello_time); ++ if (br->dev->flags & IFF_UP) ++ mod_timer(&br->hello_timer, jiffies + br->hello_time); + br_port_state_selection(br); + } + diff --git a/queue-4.11/net-dsa-fix-stale-cpu_switch-reference-after-unbind-then-bind.patch b/queue-4.11/net-dsa-fix-stale-cpu_switch-reference-after-unbind-then-bind.patch new file mode 100644 index 00000000000..3cec40607ff --- /dev/null +++ b/queue-4.11/net-dsa-fix-stale-cpu_switch-reference-after-unbind-then-bind.patch @@ -0,0 +1,48 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Florian Fainelli +Date: Fri, 2 Jun 2017 22:05:23 -0700 +Subject: net: dsa: Fix stale cpu_switch reference after unbind then bind + +From: Florian Fainelli + + +[ Upstream commit b07ac9894644202614ca87c69f3f45e424a82fef ] + +Commit 9520ed8fb841 ("net: dsa: use cpu_switch instead of ds[0]") +replaced the use of dst->ds[0] with dst->cpu_switch since that is +functionally equivalent, however, we can now run into an use after free +scenario after unbinding then rebinding the switch driver. + +The use after free happens because we do correctly initialize +dst->cpu_switch the first time we probe in dsa_cpu_parse(), then we +unbind the driver: dsa_dst_unapply() is called, and we rebind again. +dst->cpu_switch now points to a freed "ds" structure, and so when we +finally dereference it in dsa_cpu_port_ethtool_setup(), we oops. + +To fix this, simply set dst->cpu_switch to NULL in dsa_dst_unapply() +which guarantees that we always correctly re-assign dst->cpu_switch in +dsa_cpu_parse(). + +Fixes: 9520ed8fb841 ("net: dsa: use cpu_switch instead of ds[0]") +Signed-off-by: Florian Fainelli +Reviewed-by: Vivien Didelot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dsa/dsa2.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/dsa/dsa2.c ++++ b/net/dsa/dsa2.c +@@ -440,8 +440,10 @@ static void dsa_dst_unapply(struct dsa_s + dsa_ds_unapply(dst, ds); + } + +- if (dst->cpu_switch) ++ if (dst->cpu_switch) { + dsa_cpu_port_ethtool_restore(dst->cpu_switch); ++ dst->cpu_switch = NULL; ++ } + + pr_info("DSA: tree %d unapplied\n", dst->tree); + dst->applied = false; diff --git a/queue-4.11/net-ethoc-enable-napi-before-poll-may-be-scheduled.patch b/queue-4.11/net-ethoc-enable-napi-before-poll-may-be-scheduled.patch new file mode 100644 index 00000000000..841390031c0 --- /dev/null +++ b/queue-4.11/net-ethoc-enable-napi-before-poll-may-be-scheduled.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Max Filippov +Date: Mon, 5 Jun 2017 18:31:16 -0700 +Subject: net: ethoc: enable NAPI before poll may be scheduled + +From: Max Filippov + + +[ Upstream commit d220b942a4b6a0640aee78841608f4aa5e8e185e ] + +ethoc_reset enables device interrupts, ethoc_interrupt may schedule a +NAPI poll before NAPI is enabled in the ethoc_open, which results in +device being unable to send or receive anything until it's closed and +reopened. In case the device is flooded with ingress packets it may be +unable to recover at all. +Move napi_enable above ethoc_reset in the ethoc_open to fix that. + +Fixes: a1702857724f ("net: Add support for the OpenCores 10/100 Mbps Ethernet MAC.") +Signed-off-by: Max Filippov +Reviewed-by: Tobias Klauser +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ethoc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/ethoc.c ++++ b/drivers/net/ethernet/ethoc.c +@@ -739,6 +739,8 @@ static int ethoc_open(struct net_device + if (ret) + return ret; + ++ napi_enable(&priv->napi); ++ + ethoc_init_ring(priv, dev->mem_start); + ethoc_reset(priv); + +@@ -754,7 +756,6 @@ static int ethoc_open(struct net_device + priv->old_duplex = -1; + + phy_start(dev->phydev); +- napi_enable(&priv->napi); + + if (netif_msg_ifup(priv)) { + dev_info(&dev->dev, "I/O: %08lx Memory: %08lx-%08lx\n", diff --git a/queue-4.11/net-ipv6-fix-calipso-causing-gpf-with-datagram-support.patch b/queue-4.11/net-ipv6-fix-calipso-causing-gpf-with-datagram-support.patch new file mode 100644 index 00000000000..016f5088a88 --- /dev/null +++ b/queue-4.11/net-ipv6-fix-calipso-causing-gpf-with-datagram-support.patch @@ -0,0 +1,53 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Richard Haines +Date: Mon, 5 Jun 2017 16:44:40 +0100 +Subject: net/ipv6: Fix CALIPSO causing GPF with datagram support + +From: Richard Haines + + +[ Upstream commit e3ebdb20fddacded2740a333ff66781e0d28b05c ] + +When using CALIPSO with IPPROTO_UDP it is possible to trigger a GPF as the +IP header may have moved. + +Also update the payload length after adding the CALIPSO option. + +Signed-off-by: Richard Haines +Acked-by: Paul Moore +Signed-off-by: Huw Davies +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/calipso.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/net/ipv6/calipso.c ++++ b/net/ipv6/calipso.c +@@ -1319,7 +1319,7 @@ static int calipso_skbuff_setattr(struct + struct ipv6hdr *ip6_hdr; + struct ipv6_opt_hdr *hop; + unsigned char buf[CALIPSO_MAX_BUFFER]; +- int len_delta, new_end, pad; ++ int len_delta, new_end, pad, payload; + unsigned int start, end; + + ip6_hdr = ipv6_hdr(skb); +@@ -1346,6 +1346,8 @@ static int calipso_skbuff_setattr(struct + if (ret_val < 0) + return ret_val; + ++ ip6_hdr = ipv6_hdr(skb); /* Reset as skb_cow() may have moved it */ ++ + if (len_delta) { + if (len_delta > 0) + skb_push(skb, len_delta); +@@ -1355,6 +1357,8 @@ static int calipso_skbuff_setattr(struct + sizeof(*ip6_hdr) + start); + skb_reset_network_header(skb); + ip6_hdr = ipv6_hdr(skb); ++ payload = ntohs(ip6_hdr->payload_len); ++ ip6_hdr->payload_len = htons(payload + len_delta); + } + + hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1); diff --git a/queue-4.11/net-ping-do-not-abuse-udp_poll.patch b/queue-4.11/net-ping-do-not-abuse-udp_poll.patch new file mode 100644 index 00000000000..9c4b3894d79 --- /dev/null +++ b/queue-4.11/net-ping-do-not-abuse-udp_poll.patch @@ -0,0 +1,76 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Eric Dumazet +Date: Sat, 3 Jun 2017 09:29:25 -0700 +Subject: net: ping: do not abuse udp_poll() + +From: Eric Dumazet + + +[ Upstream commit 77d4b1d36926a9b8387c6b53eeba42bcaaffcea3 ] + +Alexander reported various KASAN messages triggered in recent kernels + +The problem is that ping sockets should not use udp_poll() in the first +place, and recent changes in UDP stack finally exposed this old bug. + +Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") +Fixes: 6d0bfe226116 ("net: ipv6: Add IPv6 support to the ping socket.") +Signed-off-by: Eric Dumazet +Reported-by: Sasha Levin +Cc: Solar Designer +Cc: Vasiliy Kulikov +Cc: Lorenzo Colitti +Acked-By: Lorenzo Colitti +Tested-By: Lorenzo Colitti +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ipv6.h | 1 + + net/ipv4/af_inet.c | 2 +- + net/ipv6/ping.c | 2 +- + net/ipv6/raw.c | 2 +- + 4 files changed, 4 insertions(+), 3 deletions(-) + +--- a/include/net/ipv6.h ++++ b/include/net/ipv6.h +@@ -1007,6 +1007,7 @@ int inet6_hash_connect(struct inet_timew + */ + extern const struct proto_ops inet6_stream_ops; + extern const struct proto_ops inet6_dgram_ops; ++extern const struct proto_ops inet6_sockraw_ops; + + struct group_source_req; + struct group_filter; +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -1043,7 +1043,7 @@ static struct inet_protosw inetsw_array[ + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMP, + .prot = &ping_prot, +- .ops = &inet_dgram_ops, ++ .ops = &inet_sockraw_ops, + .flags = INET_PROTOSW_REUSE, + }, + +--- a/net/ipv6/ping.c ++++ b/net/ipv6/ping.c +@@ -192,7 +192,7 @@ static struct inet_protosw pingv6_protos + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, +- .ops = &inet6_dgram_ops, ++ .ops = &inet6_sockraw_ops, + .flags = INET_PROTOSW_REUSE, + }; + +--- a/net/ipv6/raw.c ++++ b/net/ipv6/raw.c +@@ -1338,7 +1338,7 @@ void raw6_proc_exit(void) + #endif /* CONFIG_PROC_FS */ + + /* Same as inet6_dgram_ops, sans udp_poll. */ +-static const struct proto_ops inet6_sockraw_ops = { ++const struct proto_ops inet6_sockraw_ops = { + .family = PF_INET6, + .owner = THIS_MODULE, + .release = inet6_release, diff --git a/queue-4.11/net-stmmac-fix-completely-hung-tx-when-using-tso.patch b/queue-4.11/net-stmmac-fix-completely-hung-tx-when-using-tso.patch new file mode 100644 index 00000000000..12c8f0c7112 --- /dev/null +++ b/queue-4.11/net-stmmac-fix-completely-hung-tx-when-using-tso.patch @@ -0,0 +1,44 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Niklas Cassel +Date: Tue, 6 Jun 2017 09:25:00 +0200 +Subject: net: stmmac: fix completely hung TX when using TSO + +From: Niklas Cassel + + +[ Upstream commit 426849e6611f2092553f8d53372ae310818a6292 ] + +stmmac_tso_allocator can fail to set the Last Descriptor bit +on a descriptor that actually was the last descriptor. + +This happens when the buffer of the last descriptor ends +up having a size of exactly TSO_MAX_BUFF_SIZE. + +When the IP eventually reaches the next last descriptor, +which actually has the bit set, the DMA will hang. + +When the DMA hangs, we get a tx timeout, however, +since stmmac does not do a complete reset of the IP +in stmmac_tx_timeout, we end up in a state with +completely hung TX. + +Signed-off-by: Niklas Cassel +Acked-by: Giuseppe Cavallaro +Acked-by: Alexandre TORGUE +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -1950,7 +1950,7 @@ static void stmmac_tso_allocator(struct + + priv->hw->desc->prepare_tso_tx_desc(desc, 0, buff_size, + 0, 1, +- (last_segment) && (buff_size < TSO_MAX_BUFF_SIZE), ++ (last_segment) && (tmp_len <= TSO_MAX_BUFF_SIZE), + 0, 0); + + tmp_len -= TSO_MAX_BUFF_SIZE; diff --git a/queue-4.11/net-systemport-fix-missing-wake-on-lan-interrupt-for-systemport-lite.patch b/queue-4.11/net-systemport-fix-missing-wake-on-lan-interrupt-for-systemport-lite.patch new file mode 100644 index 00000000000..aec019fde64 --- /dev/null +++ b/queue-4.11/net-systemport-fix-missing-wake-on-lan-interrupt-for-systemport-lite.patch @@ -0,0 +1,40 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Florian Fainelli +Date: Thu, 1 Jun 2017 18:02:39 -0700 +Subject: net: systemport: Fix missing Wake-on-LAN interrupt for SYSTEMPORT Lite + +From: Florian Fainelli + + +[ Upstream commit d31353cd753c443ace5723d6878a39f393a0c136 ] + +On SYSTEMPORT Lite, since we have the main interrupt source in the first +cell, the second cell is the Wake-on-LAN interrupt, yet the code was not +properly updated to fetch the second cell, and instead looked at the +third and non-existing cell for Wake-on-LAN. + +Fixes: 44a4524c54af ("net: systemport: Add support for SYSTEMPORT Lite") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bcmsysport.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bcmsysport.c ++++ b/drivers/net/ethernet/broadcom/bcmsysport.c +@@ -1968,9 +1968,12 @@ static int bcm_sysport_probe(struct plat + priv->num_rx_desc_words = params->num_rx_desc_words; + + priv->irq0 = platform_get_irq(pdev, 0); +- if (!priv->is_lite) ++ if (!priv->is_lite) { + priv->irq1 = platform_get_irq(pdev, 1); +- priv->wol_irq = platform_get_irq(pdev, 2); ++ priv->wol_irq = platform_get_irq(pdev, 2); ++ } else { ++ priv->wol_irq = platform_get_irq(pdev, 1); ++ } + if (priv->irq0 <= 0 || (priv->irq1 <= 0 && !priv->is_lite)) { + dev_err(&pdev->dev, "invalid interrupts\n"); + ret = -EINVAL; diff --git a/queue-4.11/ravb-fix-use-after-free-on-ifconfig-eth0-down.patch b/queue-4.11/ravb-fix-use-after-free-on-ifconfig-eth0-down.patch new file mode 100644 index 00000000000..5e807402378 --- /dev/null +++ b/queue-4.11/ravb-fix-use-after-free-on-ifconfig-eth0-down.patch @@ -0,0 +1,130 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Eugeniu Rosca +Date: Tue, 6 Jun 2017 00:08:10 +0200 +Subject: ravb: Fix use-after-free on `ifconfig eth0 down` + +From: Eugeniu Rosca + + +[ Upstream commit 79514ef670e9e575a1fe36922268c439d0f0ca8a ] + +Commit a47b70ea86bd ("ravb: unmap descriptors when freeing rings") has +introduced the issue seen in [1] reproduced on H3ULCB board. + +Fix this by relocating the RX skb ringbuffer free operation, so that +swiotlb page unmapping can be done first. Freeing of aligned TX buffers +is not relevant to the issue seen in [1]. Still, reposition TX free +calls as well, to have all kfree() operations performed consistently +_after_ dma_unmap_*()/dma_free_*(). + +[1] Console screenshot with the problem reproduced: + +salvator-x login: root +root@salvator-x:~# ifconfig eth0 up +Micrel KSZ9031 Gigabit PHY e6800000.ethernet-ffffffff:00: \ + attached PHY driver [Micrel KSZ9031 Gigabit PHY] \ + (mii_bus:phy_addr=e6800000.ethernet-ffffffff:00, irq=235) +IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready +root@salvator-x:~# +root@salvator-x:~# ifconfig eth0 down + +================================================================== +BUG: KASAN: use-after-free in swiotlb_tbl_unmap_single+0xc4/0x35c +Write of size 1538 at addr ffff8006d884f780 by task ifconfig/1649 + +CPU: 0 PID: 1649 Comm: ifconfig Not tainted 4.12.0-rc4-00004-g112eb07287d1 #32 +Hardware name: Renesas H3ULCB board based on r8a7795 (DT) +Call trace: +[] dump_backtrace+0x0/0x3a4 +[] show_stack+0x14/0x1c +[] dump_stack+0xf8/0x150 +[] print_address_description+0x7c/0x330 +[] kasan_report+0x2e0/0x2f4 +[] check_memory_region+0x20/0x14c +[] memcpy+0x48/0x68 +[] swiotlb_tbl_unmap_single+0xc4/0x35c +[] unmap_single+0x90/0xa4 +[] swiotlb_unmap_page+0xc/0x14 +[] __swiotlb_unmap_page+0xcc/0xe4 +[] ravb_ring_free+0x514/0x870 +[] ravb_close+0x288/0x36c +[] __dev_close_many+0x14c/0x174 +[] __dev_close+0xc8/0x144 +[] __dev_change_flags+0xd8/0x194 +[] dev_change_flags+0x60/0xb0 +[] devinet_ioctl+0x484/0x9d4 +[] inet_ioctl+0x190/0x194 +[] sock_do_ioctl+0x78/0xa8 +[] sock_ioctl+0x110/0x3c4 +[] vfs_ioctl+0x90/0xa0 +[] do_vfs_ioctl+0x148/0xc38 +[] SyS_ioctl+0x44/0x74 +[] el0_svc_naked+0x24/0x28 + +The buggy address belongs to the page: +page:ffff7e001b6213c0 count:0 mapcount:0 mapping: (null) index:0x0 +flags: 0x4000000000000000() +raw: 4000000000000000 0000000000000000 0000000000000000 00000000ffffffff +raw: 0000000000000000 ffff7e001b6213e0 0000000000000000 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff8006d884f680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff8006d884f700: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff +>ffff8006d884f780: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ^ + ffff8006d884f800: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff8006d884f880: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff +================================================================== +Disabling lock debugging due to kernel taint +root@salvator-x:~# + +Fixes: a47b70ea86bd ("ravb: unmap descriptors when freeing rings") +Signed-off-by: Eugeniu Rosca +Acked-by: Sergei Shtylyov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/renesas/ravb_main.c | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +--- a/drivers/net/ethernet/renesas/ravb_main.c ++++ b/drivers/net/ethernet/renesas/ravb_main.c +@@ -230,18 +230,6 @@ static void ravb_ring_free(struct net_de + int ring_size; + int i; + +- /* Free RX skb ringbuffer */ +- if (priv->rx_skb[q]) { +- for (i = 0; i < priv->num_rx_ring[q]; i++) +- dev_kfree_skb(priv->rx_skb[q][i]); +- } +- kfree(priv->rx_skb[q]); +- priv->rx_skb[q] = NULL; +- +- /* Free aligned TX buffers */ +- kfree(priv->tx_align[q]); +- priv->tx_align[q] = NULL; +- + if (priv->rx_ring[q]) { + for (i = 0; i < priv->num_rx_ring[q]; i++) { + struct ravb_ex_rx_desc *desc = &priv->rx_ring[q][i]; +@@ -270,6 +258,18 @@ static void ravb_ring_free(struct net_de + priv->tx_ring[q] = NULL; + } + ++ /* Free RX skb ringbuffer */ ++ if (priv->rx_skb[q]) { ++ for (i = 0; i < priv->num_rx_ring[q]; i++) ++ dev_kfree_skb(priv->rx_skb[q][i]); ++ } ++ kfree(priv->rx_skb[q]); ++ priv->rx_skb[q] = NULL; ++ ++ /* Free aligned TX buffers */ ++ kfree(priv->tx_align[q]); ++ priv->tx_align[q] = NULL; ++ + /* Free TX skb ringbuffer. + * SKBs are freed by ravb_tx_free() call above. + */ diff --git a/queue-4.11/series b/queue-4.11/series new file mode 100644 index 00000000000..836d71eb8e8 --- /dev/null +++ b/queue-4.11/series @@ -0,0 +1,30 @@ +bnx2x-fix-multi-cos.patch +net-bridge-start-hello-timer-only-if-device-is-up.patch +vxlan-eliminate-cached-dst-leak.patch +net-systemport-fix-missing-wake-on-lan-interrupt-for-systemport-lite.patch +ipv6-xfrm-handle-errors-reported-by-xfrm6_find_1stfragopt.patch +cxgb4-avoid-enabling-napi-twice-to-the-same-queue.patch +tcp-disallow-cwnd-undo-when-switching-congestion-control.patch +vxlan-fix-use-after-free-on-deletion.patch +ip6_tunnel-fix-traffic-class-routing-for-tunnels.patch +sock-reset-sk_err-when-the-error-queue-is-empty.patch +geneve-fix-needed_headroom-and-max_mtu-for-collect_metadata.patch +ipv6-fix-leak-in-ipv6_gso_segment.patch +net-dsa-fix-stale-cpu_switch-reference-after-unbind-then-bind.patch +net-ping-do-not-abuse-udp_poll.patch +net-ipv6-fix-calipso-causing-gpf-with-datagram-support.patch +ravb-fix-use-after-free-on-ifconfig-eth0-down.patch +net-bridge-fix-a-null-pointer-dereference-in-br_afspec.patch +net-ethoc-enable-napi-before-poll-may-be-scheduled.patch +net-stmmac-fix-completely-hung-tx-when-using-tso.patch +sparc64-add-__multi3-for-gcc-7.x-and-later.patch +sparc64-mm-fix-copy_tsb-to-correctly-copy-huge-page-tsbs.patch +sparc-machine-description-indices-can-vary.patch +sparc-mm-hugepages-fix-setup_hugepagesz-for-invalid-values.patch +sparc64-reset-mm-cpumask-after-wrap.patch +sparc64-combine-activate_mm-and-switch_mm.patch +sparc64-redefine-first-version.patch +sparc64-add-per-cpu-mm-of-secondary-contexts.patch +sparc64-new-context-wrap.patch +sparc64-delete-old-wrap-code.patch +arch-sparc-support-nr_cpus-4096.patch diff --git a/queue-4.11/sock-reset-sk_err-when-the-error-queue-is-empty.patch b/queue-4.11/sock-reset-sk_err-when-the-error-queue-is-empty.patch new file mode 100644 index 00000000000..75a00e5a829 --- /dev/null +++ b/queue-4.11/sock-reset-sk_err-when-the-error-queue-is-empty.patch @@ -0,0 +1,47 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Soheil Hassas Yeganeh +Date: Fri, 2 Jun 2017 12:38:22 -0400 +Subject: sock: reset sk_err when the error queue is empty + +From: Soheil Hassas Yeganeh + + +[ Upstream commit 38b257938ac6655d0d6333743303231b9c465ec1 ] + +Prior to f5f99309fa74 (sock: do not set sk_err in +sock_dequeue_err_skb), sk_err was reset to the error of +the skb on the head of the error queue. + +Applications, most notably ping, are relying on this +behavior to reset sk_err for ICMP packets. + +Set sk_err to the ICMP error when there is an ICMP packet +at the head of the error queue. + +Fixes: f5f99309fa74 (sock: do not set sk_err in sock_dequeue_err_skb) +Reported-by: Cyril Hrubis +Tested-by: Cyril Hrubis +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: Eric Dumazet +Signed-off-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -3755,8 +3755,11 @@ struct sk_buff *sock_dequeue_err_skb(str + + spin_lock_irqsave(&q->lock, flags); + skb = __skb_dequeue(q); +- if (skb && (skb_next = skb_peek(q))) ++ if (skb && (skb_next = skb_peek(q))) { + icmp_next = is_icmp_err_skb(skb_next); ++ if (icmp_next) ++ sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin; ++ } + spin_unlock_irqrestore(&q->lock, flags); + + if (is_icmp_err_skb(skb) && !icmp_next) diff --git a/queue-4.11/sparc-machine-description-indices-can-vary.patch b/queue-4.11/sparc-machine-description-indices-can-vary.patch new file mode 100644 index 00000000000..94d2e467c28 --- /dev/null +++ b/queue-4.11/sparc-machine-description-indices-can-vary.patch @@ -0,0 +1,142 @@ +From foo@baz Thu Jun 8 08:58:43 CEST 2017 +From: James Clarke +Date: Mon, 29 May 2017 20:17:56 +0100 +Subject: sparc: Machine description indices can vary + +From: James Clarke + + +[ Upstream commit c982aa9c304bf0b9a7522fd118fed4afa5a0263c ] + +VIO devices were being looked up by their index in the machine +description node block, but this often varies over time as devices are +added and removed. Instead, store the ID and look up using the type, +config handle and ID. + +Signed-off-by: James Clarke +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=112541 +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/vio.h | 1 + arch/sparc/kernel/vio.c | 68 ++++++++++++++++++++++++++++++++++++++++--- + 2 files changed, 65 insertions(+), 4 deletions(-) + +--- a/arch/sparc/include/asm/vio.h ++++ b/arch/sparc/include/asm/vio.h +@@ -327,6 +327,7 @@ struct vio_dev { + int compat_len; + + u64 dev_no; ++ u64 id; + + unsigned long channel_id; + +--- a/arch/sparc/kernel/vio.c ++++ b/arch/sparc/kernel/vio.c +@@ -302,13 +302,16 @@ static struct vio_dev *vio_create_one(st + if (!id) { + dev_set_name(&vdev->dev, "%s", bus_id_name); + vdev->dev_no = ~(u64)0; ++ vdev->id = ~(u64)0; + } else if (!cfg_handle) { + dev_set_name(&vdev->dev, "%s-%llu", bus_id_name, *id); + vdev->dev_no = *id; ++ vdev->id = ~(u64)0; + } else { + dev_set_name(&vdev->dev, "%s-%llu-%llu", bus_id_name, + *cfg_handle, *id); + vdev->dev_no = *cfg_handle; ++ vdev->id = *id; + } + + vdev->dev.parent = parent; +@@ -351,27 +354,84 @@ static void vio_add(struct mdesc_handle + (void) vio_create_one(hp, node, &root_vdev->dev); + } + ++struct vio_md_node_query { ++ const char *type; ++ u64 dev_no; ++ u64 id; ++}; ++ + static int vio_md_node_match(struct device *dev, void *arg) + { ++ struct vio_md_node_query *query = (struct vio_md_node_query *) arg; + struct vio_dev *vdev = to_vio_dev(dev); + +- if (vdev->mp == (u64) arg) +- return 1; ++ if (vdev->dev_no != query->dev_no) ++ return 0; ++ if (vdev->id != query->id) ++ return 0; ++ if (strcmp(vdev->type, query->type)) ++ return 0; + +- return 0; ++ return 1; + } + + static void vio_remove(struct mdesc_handle *hp, u64 node) + { ++ const char *type; ++ const u64 *id, *cfg_handle; ++ u64 a; ++ struct vio_md_node_query query; + struct device *dev; + +- dev = device_find_child(&root_vdev->dev, (void *) node, ++ type = mdesc_get_property(hp, node, "device-type", NULL); ++ if (!type) { ++ type = mdesc_get_property(hp, node, "name", NULL); ++ if (!type) ++ type = mdesc_node_name(hp, node); ++ } ++ ++ query.type = type; ++ ++ id = mdesc_get_property(hp, node, "id", NULL); ++ cfg_handle = NULL; ++ mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) { ++ u64 target; ++ ++ target = mdesc_arc_target(hp, a); ++ cfg_handle = mdesc_get_property(hp, target, ++ "cfg-handle", NULL); ++ if (cfg_handle) ++ break; ++ } ++ ++ if (!id) { ++ query.dev_no = ~(u64)0; ++ query.id = ~(u64)0; ++ } else if (!cfg_handle) { ++ query.dev_no = *id; ++ query.id = ~(u64)0; ++ } else { ++ query.dev_no = *cfg_handle; ++ query.id = *id; ++ } ++ ++ dev = device_find_child(&root_vdev->dev, &query, + vio_md_node_match); + if (dev) { + printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev)); + + device_unregister(dev); + put_device(dev); ++ } else { ++ if (!id) ++ printk(KERN_ERR "VIO: Removed unknown %s node.\n", ++ type); ++ else if (!cfg_handle) ++ printk(KERN_ERR "VIO: Removed unknown %s node %llu.\n", ++ type, *id); ++ else ++ printk(KERN_ERR "VIO: Removed unknown %s node %llu-%llu.\n", ++ type, *cfg_handle, *id); + } + } + diff --git a/queue-4.11/sparc-mm-hugepages-fix-setup_hugepagesz-for-invalid-values.patch b/queue-4.11/sparc-mm-hugepages-fix-setup_hugepagesz-for-invalid-values.patch new file mode 100644 index 00000000000..7ec3aa6e169 --- /dev/null +++ b/queue-4.11/sparc-mm-hugepages-fix-setup_hugepagesz-for-invalid-values.patch @@ -0,0 +1,32 @@ +From foo@baz Thu Jun 8 08:58:43 CEST 2017 +From: "Liam R. Howlett" +Date: Tue, 30 May 2017 15:45:00 -0400 +Subject: sparc/mm/hugepages: Fix setup_hugepagesz for invalid values. + +From: "Liam R. Howlett" + + +[ Upstream commit f322980b74a15e08f8c70a34a5864ecdbf957251 ] + +hugetlb_bad_size needs to be called on invalid values. Also change the +pr_warn to a pr_err to better align with other platforms. + +Signed-off-by: Liam R. Howlett +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/init_64.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -358,7 +358,8 @@ static int __init setup_hugepagesz(char + } + + if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) { +- pr_warn("hugepagesz=%llu not supported by MMU.\n", ++ hugetlb_bad_size(); ++ pr_err("hugepagesz=%llu not supported by MMU.\n", + hugepage_size); + goto out; + } diff --git a/queue-4.11/sparc64-add-__multi3-for-gcc-7.x-and-later.patch b/queue-4.11/sparc64-add-__multi3-for-gcc-7.x-and-later.patch new file mode 100644 index 00000000000..292cb94862b --- /dev/null +++ b/queue-4.11/sparc64-add-__multi3-for-gcc-7.x-and-later.patch @@ -0,0 +1,67 @@ +From foo@baz Thu Jun 8 08:58:43 CEST 2017 +From: "David S. Miller" +Date: Mon, 5 Jun 2017 11:28:57 -0700 +Subject: sparc64: Add __multi3 for gcc 7.x and later. + +From: "David S. Miller" + + +[ Upstream commit 1b4af13ff2cc6897557bb0b8d9e2fad4fa4d67aa ] + +Reported-by: Waldemar Brodkorb +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/lib/Makefile | 1 + + arch/sparc/lib/multi3.S | 35 +++++++++++++++++++++++++++++++++++ + 2 files changed, 36 insertions(+) + create mode 100644 arch/sparc/lib/multi3.S + +--- a/arch/sparc/lib/Makefile ++++ b/arch/sparc/lib/Makefile +@@ -15,6 +15,7 @@ lib-$(CONFIG_SPARC32) += copy_user.o loc + lib-$(CONFIG_SPARC64) += atomic_64.o + lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o + lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o ++lib-$(CONFIG_SPARC64) += multi3.o + + lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o + lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o +--- /dev/null ++++ b/arch/sparc/lib/multi3.S +@@ -0,0 +1,35 @@ ++#include ++#include ++ ++ .text ++ .align 4 ++ENTRY(__multi3) /* %o0 = u, %o1 = v */ ++ mov %o1, %g1 ++ srl %o3, 0, %g4 ++ mulx %g4, %g1, %o1 ++ srlx %g1, 0x20, %g3 ++ mulx %g3, %g4, %g5 ++ sllx %g5, 0x20, %o5 ++ srl %g1, 0, %g4 ++ sub %o1, %o5, %o5 ++ srlx %o5, 0x20, %o5 ++ addcc %g5, %o5, %g5 ++ srlx %o3, 0x20, %o5 ++ mulx %g4, %o5, %g4 ++ mulx %g3, %o5, %o5 ++ sethi %hi(0x80000000), %g3 ++ addcc %g5, %g4, %g5 ++ srlx %g5, 0x20, %g5 ++ add %g3, %g3, %g3 ++ movcc %xcc, %g0, %g3 ++ addcc %o5, %g5, %o5 ++ sllx %g4, 0x20, %g4 ++ add %o1, %g4, %o1 ++ add %o5, %g3, %g2 ++ mulx %g1, %o2, %g1 ++ add %g1, %g2, %g1 ++ mulx %o0, %o3, %o0 ++ retl ++ add %g1, %o0, %o0 ++ENDPROC(__multi3) ++EXPORT_SYMBOL(__multi3) diff --git a/queue-4.11/sparc64-add-per-cpu-mm-of-secondary-contexts.patch b/queue-4.11/sparc64-add-per-cpu-mm-of-secondary-contexts.patch new file mode 100644 index 00000000000..024c6beea55 --- /dev/null +++ b/queue-4.11/sparc64-add-per-cpu-mm-of-secondary-contexts.patch @@ -0,0 +1,62 @@ +From foo@baz Thu Jun 8 08:58:43 CEST 2017 +From: Pavel Tatashin +Date: Wed, 31 May 2017 11:25:23 -0400 +Subject: sparc64: add per-cpu mm of secondary contexts + +From: Pavel Tatashin + + +[ Upstream commit 7a5b4bbf49fe86ce77488a70c5dccfe2d50d7a2d ] + +The new wrap is going to use information from this array to figure out +mm's that currently have valid secondary contexts setup. + +Signed-off-by: Pavel Tatashin +Reviewed-by: Bob Picco +Reviewed-by: Steven Sistare +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/mmu_context_64.h | 5 +++-- + arch/sparc/mm/init_64.c | 1 + + 2 files changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/sparc/include/asm/mmu_context_64.h ++++ b/arch/sparc/include/asm/mmu_context_64.h +@@ -19,6 +19,7 @@ extern spinlock_t ctx_alloc_lock; + extern unsigned long tlb_context_cache; + extern unsigned long mmu_context_bmap[]; + ++DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm); + void get_new_mmu_context(struct mm_struct *mm); + #ifdef CONFIG_SMP + void smp_new_mmu_context_version(void); +@@ -76,8 +77,9 @@ void __flush_tlb_mm(unsigned long, unsig + static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk) + { + unsigned long ctx_valid, flags; +- int cpu; ++ int cpu = smp_processor_id(); + ++ per_cpu(per_cpu_secondary_mm, cpu) = mm; + if (unlikely(mm == &init_mm)) + return; + +@@ -123,7 +125,6 @@ static inline void switch_mm(struct mm_s + * for the first time, we must flush that context out of the + * local TLB. + */ +- cpu = smp_processor_id(); + if (!ctx_valid || !cpumask_test_cpu(cpu, mm_cpumask(mm))) { + cpumask_set_cpu(cpu, mm_cpumask(mm)); + __flush_tlb_mm(CTX_HWBITS(mm->context), +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -711,6 +711,7 @@ unsigned long tlb_context_cache = CTX_FI + #define MAX_CTX_NR (1UL << CTX_NR_BITS) + #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR) + DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); ++DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0}; + + /* Caller does TLB context flushing on local CPU if necessary. + * The caller also ensures that CTX_VALID(mm->context) is false. diff --git a/queue-4.11/sparc64-combine-activate_mm-and-switch_mm.patch b/queue-4.11/sparc64-combine-activate_mm-and-switch_mm.patch new file mode 100644 index 00000000000..9ef5e7bc6ec --- /dev/null +++ b/queue-4.11/sparc64-combine-activate_mm-and-switch_mm.patch @@ -0,0 +1,54 @@ +From foo@baz Thu Jun 8 08:58:43 CEST 2017 +From: Pavel Tatashin +Date: Wed, 31 May 2017 11:25:21 -0400 +Subject: sparc64: combine activate_mm and switch_mm + +From: Pavel Tatashin + + +[ Upstream commit 14d0334c6748ff2aedb3f2f7fdc51ee90a9b54e7 ] + +The only difference between these two functions is that in activate_mm we +unconditionally flush context. However, there is no need to keep this +difference after fixing a bug where cpumask was not reset on a wrap. So, in +this patch we combine these. + +Signed-off-by: Pavel Tatashin +Reviewed-by: Bob Picco +Reviewed-by: Steven Sistare +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/mmu_context_64.h | 21 +-------------------- + 1 file changed, 1 insertion(+), 20 deletions(-) + +--- a/arch/sparc/include/asm/mmu_context_64.h ++++ b/arch/sparc/include/asm/mmu_context_64.h +@@ -133,26 +133,7 @@ static inline void switch_mm(struct mm_s + } + + #define deactivate_mm(tsk,mm) do { } while (0) +- +-/* Activate a new MM instance for the current task. */ +-static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm) +-{ +- unsigned long flags; +- int cpu; +- +- spin_lock_irqsave(&mm->context.lock, flags); +- if (!CTX_VALID(mm->context)) +- get_new_mmu_context(mm); +- cpu = smp_processor_id(); +- if (!cpumask_test_cpu(cpu, mm_cpumask(mm))) +- cpumask_set_cpu(cpu, mm_cpumask(mm)); +- +- load_secondary_context(mm); +- __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); +- tsb_context_switch(mm); +- spin_unlock_irqrestore(&mm->context.lock, flags); +-} +- ++#define activate_mm(active_mm, mm) switch_mm(active_mm, mm, NULL) + #endif /* !(__ASSEMBLY__) */ + + #endif /* !(__SPARC64_MMU_CONTEXT_H) */ diff --git a/queue-4.11/sparc64-delete-old-wrap-code.patch b/queue-4.11/sparc64-delete-old-wrap-code.patch new file mode 100644 index 00000000000..620fec978de --- /dev/null +++ b/queue-4.11/sparc64-delete-old-wrap-code.patch @@ -0,0 +1,128 @@ +From foo@baz Thu Jun 8 08:58:43 CEST 2017 +From: Pavel Tatashin +Date: Wed, 31 May 2017 11:25:25 -0400 +Subject: sparc64: delete old wrap code + +From: Pavel Tatashin + + +[ Upstream commit 0197e41ce70511dc3b71f7fefa1a676e2b5cd60b ] + +The old method that is using xcall and softint to get new context id is +deleted, as it is replaced by a method of using per_cpu_secondary_mm +without xcall to perform the context wrap. + +Signed-off-by: Pavel Tatashin +Reviewed-by: Bob Picco +Reviewed-by: Steven Sistare +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/mmu_context_64.h | 6 ------ + arch/sparc/include/asm/pil.h | 1 - + arch/sparc/kernel/kernel.h | 1 - + arch/sparc/kernel/smp_64.c | 31 ------------------------------- + arch/sparc/kernel/ttable_64.S | 2 +- + arch/sparc/mm/ultra.S | 5 ----- + 6 files changed, 1 insertion(+), 45 deletions(-) + +--- a/arch/sparc/include/asm/mmu_context_64.h ++++ b/arch/sparc/include/asm/mmu_context_64.h +@@ -21,12 +21,6 @@ extern unsigned long mmu_context_bmap[]; + + DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm); + void get_new_mmu_context(struct mm_struct *mm); +-#ifdef CONFIG_SMP +-void smp_new_mmu_context_version(void); +-#else +-#define smp_new_mmu_context_version() do { } while (0) +-#endif +- + int init_new_context(struct task_struct *tsk, struct mm_struct *mm); + void destroy_context(struct mm_struct *mm); + +--- a/arch/sparc/include/asm/pil.h ++++ b/arch/sparc/include/asm/pil.h +@@ -20,7 +20,6 @@ + #define PIL_SMP_CALL_FUNC 1 + #define PIL_SMP_RECEIVE_SIGNAL 2 + #define PIL_SMP_CAPTURE 3 +-#define PIL_SMP_CTX_NEW_VERSION 4 + #define PIL_DEVICE_IRQ 5 + #define PIL_SMP_CALL_FUNC_SNGL 6 + #define PIL_DEFERRED_PCR_WORK 7 +--- a/arch/sparc/kernel/kernel.h ++++ b/arch/sparc/kernel/kernel.h +@@ -37,7 +37,6 @@ void handle_stdfmna(struct pt_regs *regs + /* smp_64.c */ + void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs); + void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs); +-void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs); + void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs); + void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs); + +--- a/arch/sparc/kernel/smp_64.c ++++ b/arch/sparc/kernel/smp_64.c +@@ -964,37 +964,6 @@ void flush_dcache_page_all(struct mm_str + preempt_enable(); + } + +-void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) +-{ +- struct mm_struct *mm; +- unsigned long flags; +- +- clear_softint(1 << irq); +- +- /* See if we need to allocate a new TLB context because +- * the version of the one we are using is now out of date. +- */ +- mm = current->active_mm; +- if (unlikely(!mm || (mm == &init_mm))) +- return; +- +- spin_lock_irqsave(&mm->context.lock, flags); +- +- if (unlikely(!CTX_VALID(mm->context))) +- get_new_mmu_context(mm); +- +- spin_unlock_irqrestore(&mm->context.lock, flags); +- +- load_secondary_context(mm); +- __flush_tlb_mm(CTX_HWBITS(mm->context), +- SECONDARY_CONTEXT); +-} +- +-void smp_new_mmu_context_version(void) +-{ +- smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0); +-} +- + #ifdef CONFIG_KGDB + void kgdb_roundup_cpus(unsigned long flags) + { +--- a/arch/sparc/kernel/ttable_64.S ++++ b/arch/sparc/kernel/ttable_64.S +@@ -50,7 +50,7 @@ tl0_resv03e: BTRAP(0x3e) BTRAP(0x3f) BTR + tl0_irq1: TRAP_IRQ(smp_call_function_client, 1) + tl0_irq2: TRAP_IRQ(smp_receive_signal_client, 2) + tl0_irq3: TRAP_IRQ(smp_penguin_jailcell, 3) +-tl0_irq4: TRAP_IRQ(smp_new_mmu_context_version_client, 4) ++tl0_irq4: BTRAP(0x44) + #else + tl0_irq1: BTRAP(0x41) + tl0_irq2: BTRAP(0x42) +--- a/arch/sparc/mm/ultra.S ++++ b/arch/sparc/mm/ultra.S +@@ -971,11 +971,6 @@ xcall_capture: + wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint + retry + +- .globl xcall_new_mmu_context_version +-xcall_new_mmu_context_version: +- wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint +- retry +- + #ifdef CONFIG_KGDB + .globl xcall_kgdb_capture + xcall_kgdb_capture: diff --git a/queue-4.11/sparc64-mm-fix-copy_tsb-to-correctly-copy-huge-page-tsbs.patch b/queue-4.11/sparc64-mm-fix-copy_tsb-to-correctly-copy-huge-page-tsbs.patch new file mode 100644 index 00000000000..ca926557f21 --- /dev/null +++ b/queue-4.11/sparc64-mm-fix-copy_tsb-to-correctly-copy-huge-page-tsbs.patch @@ -0,0 +1,99 @@ +From foo@baz Thu Jun 8 08:58:43 CEST 2017 +From: Mike Kravetz +Date: Fri, 2 Jun 2017 14:51:12 -0700 +Subject: sparc64: mm: fix copy_tsb to correctly copy huge page TSBs + +From: Mike Kravetz + + +[ Upstream commit 654f4807624a657f364417c2a7454f0df9961734 ] + +When a TSB grows beyond its current capacity, a new TSB is allocated +and copy_tsb is called to copy entries from the old TSB to the new. +A hash shift based on page size is used to calculate the index of an +entry in the TSB. copy_tsb has hard coded PAGE_SHIFT in these +calculations. However, for huge page TSBs the value REAL_HPAGE_SHIFT +should be used. As a result, when copy_tsb is called for a huge page +TSB the entries are placed at the incorrect index in the newly +allocated TSB. When doing hardware table walk, the MMU does not +match these entries and we end up in the TSB miss handling code. +This code will then create and write an entry to the correct index +in the TSB. We take a performance hit for the table walk miss and +recreation of these entries. + +Pass a new parameter to copy_tsb that is the page size shift to be +used when copying the TSB. + +Suggested-by: Anthony Yznaga +Signed-off-by: Mike Kravetz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/tsb.S | 11 +++++++---- + arch/sparc/mm/tsb.c | 7 +++++-- + 2 files changed, 12 insertions(+), 6 deletions(-) + +--- a/arch/sparc/kernel/tsb.S ++++ b/arch/sparc/kernel/tsb.S +@@ -455,13 +455,16 @@ __tsb_context_switch: + .type copy_tsb,#function + copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size + * %o2=new_tsb_base, %o3=new_tsb_size ++ * %o4=page_size_shift + */ + sethi %uhi(TSB_PASS_BITS), %g7 + srlx %o3, 4, %o3 +- add %o0, %o1, %g1 /* end of old tsb */ ++ add %o0, %o1, %o1 /* end of old tsb */ + sllx %g7, 32, %g7 + sub %o3, 1, %o3 /* %o3 == new tsb hash mask */ + ++ mov %o4, %g1 /* page_size_shift */ ++ + 661: prefetcha [%o0] ASI_N, #one_read + .section .tsb_phys_patch, "ax" + .word 661b +@@ -486,9 +489,9 @@ copy_tsb: /* %o0=old_tsb_base, %o1=old_ + /* This can definitely be computed faster... */ + srlx %o0, 4, %o5 /* Build index */ + and %o5, 511, %o5 /* Mask index */ +- sllx %o5, PAGE_SHIFT, %o5 /* Put into vaddr position */ ++ sllx %o5, %g1, %o5 /* Put into vaddr position */ + or %o4, %o5, %o4 /* Full VADDR. */ +- srlx %o4, PAGE_SHIFT, %o4 /* Shift down to create index */ ++ srlx %o4, %g1, %o4 /* Shift down to create index */ + and %o4, %o3, %o4 /* Mask with new_tsb_nents-1 */ + sllx %o4, 4, %o4 /* Shift back up into tsb ent offset */ + TSB_STORE(%o2 + %o4, %g2) /* Store TAG */ +@@ -496,7 +499,7 @@ copy_tsb: /* %o0=old_tsb_base, %o1=old_ + TSB_STORE(%o2 + %o4, %g3) /* Store TTE */ + + 80: add %o0, 16, %o0 +- cmp %o0, %g1 ++ cmp %o0, %o1 + bne,pt %xcc, 90b + nop + +--- a/arch/sparc/mm/tsb.c ++++ b/arch/sparc/mm/tsb.c +@@ -496,7 +496,8 @@ retry_tsb_alloc: + extern void copy_tsb(unsigned long old_tsb_base, + unsigned long old_tsb_size, + unsigned long new_tsb_base, +- unsigned long new_tsb_size); ++ unsigned long new_tsb_size, ++ unsigned long page_size_shift); + unsigned long old_tsb_base = (unsigned long) old_tsb; + unsigned long new_tsb_base = (unsigned long) new_tsb; + +@@ -504,7 +505,9 @@ retry_tsb_alloc: + old_tsb_base = __pa(old_tsb_base); + new_tsb_base = __pa(new_tsb_base); + } +- copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); ++ copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size, ++ tsb_index == MM_TSB_BASE ? ++ PAGE_SHIFT : REAL_HPAGE_SHIFT); + } + + mm->context.tsb_block[tsb_index].tsb = new_tsb; diff --git a/queue-4.11/sparc64-new-context-wrap.patch b/queue-4.11/sparc64-new-context-wrap.patch new file mode 100644 index 00000000000..299ede19a5b --- /dev/null +++ b/queue-4.11/sparc64-new-context-wrap.patch @@ -0,0 +1,178 @@ +From foo@baz Thu Jun 8 08:58:43 CEST 2017 +From: Pavel Tatashin +Date: Wed, 31 May 2017 11:25:24 -0400 +Subject: sparc64: new context wrap + +From: Pavel Tatashin + + +[ Upstream commit a0582f26ec9dfd5360ea2f35dd9a1b026f8adda0 ] + +The current wrap implementation has a race issue: it is called outside of +the ctx_alloc_lock, and also does not wait for all CPUs to complete the +wrap. This means that a thread can get a new context with a new version +and another thread might still be running with the same context. The +problem is especially severe on CPUs with shared TLBs, like sun4v. I used +the following test to very quickly reproduce the problem: +- start over 8K processes (must be more than context IDs) +- write and read values at a memory location in every process. + +Very quickly memory corruptions start happening, and what we read back +does not equal what we wrote. + +Several approaches were explored before settling on this one: + +Approach 1: +Move smp_new_mmu_context_version() inside ctx_alloc_lock, and wait for +every process to complete the wrap. (Note: every CPU must WAIT before +leaving smp_new_mmu_context_version_client() until every one arrives). + +This approach ends up with deadlocks, as some threads own locks which other +threads are waiting for, and they never receive softint until these threads +exit smp_new_mmu_context_version_client(). Since we do not allow the exit, +deadlock happens. + +Approach 2: +Handle wrap right during mondo interrupt. Use etrap/rtrap to enter into +into C code, and issue new versions to every CPU. +This approach adds some overhead to runtime: in switch_mm() we must add +some checks to make sure that versions have not changed due to wrap while +we were loading the new secondary context. (could be protected by PSTATE_IE +but that degrades performance as on M7 and older CPUs as it takes 50 cycles +for each access). Also, we still need a global per-cpu array of MMs to know +where we need to load new contexts, otherwise we can change context to a +thread that is going way (if we received mondo between switch_mm() and +switch_to() time). Finally, there are some issues with window registers in +rtrap() when context IDs are changed during CPU mondo time. + +The approach in this patch is the simplest and has almost no impact on +runtime. We use the array with mm's where last secondary contexts were +loaded onto CPUs and bump their versions to the new generation without +changing context IDs. If a new process comes in to get a context ID, it +will go through get_new_mmu_context() because of version mismatch. But the +running processes do not need to be interrupted. And wrap is quicker as we +do not need to xcall and wait for everyone to receive and complete wrap. + +Signed-off-by: Pavel Tatashin +Reviewed-by: Bob Picco +Reviewed-by: Steven Sistare +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/init_64.c | 81 ++++++++++++++++++++++++++++++++---------------- + 1 file changed, 54 insertions(+), 27 deletions(-) + +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -713,6 +713,53 @@ unsigned long tlb_context_cache = CTX_FI + DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); + DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0}; + ++static void mmu_context_wrap(void) ++{ ++ unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK; ++ unsigned long new_ver, new_ctx, old_ctx; ++ struct mm_struct *mm; ++ int cpu; ++ ++ bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS); ++ ++ /* Reserve kernel context */ ++ set_bit(0, mmu_context_bmap); ++ ++ new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION; ++ if (unlikely(new_ver == 0)) ++ new_ver = CTX_FIRST_VERSION; ++ tlb_context_cache = new_ver; ++ ++ /* ++ * Make sure that any new mm that are added into per_cpu_secondary_mm, ++ * are going to go through get_new_mmu_context() path. ++ */ ++ mb(); ++ ++ /* ++ * Updated versions to current on those CPUs that had valid secondary ++ * contexts ++ */ ++ for_each_online_cpu(cpu) { ++ /* ++ * If a new mm is stored after we took this mm from the array, ++ * it will go into get_new_mmu_context() path, because we ++ * already bumped the version in tlb_context_cache. ++ */ ++ mm = per_cpu(per_cpu_secondary_mm, cpu); ++ ++ if (unlikely(!mm || mm == &init_mm)) ++ continue; ++ ++ old_ctx = mm->context.sparc64_ctx_val; ++ if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) { ++ new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver; ++ set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap); ++ mm->context.sparc64_ctx_val = new_ctx; ++ } ++ } ++} ++ + /* Caller does TLB context flushing on local CPU if necessary. + * The caller also ensures that CTX_VALID(mm->context) is false. + * +@@ -727,50 +774,30 @@ void get_new_mmu_context(struct mm_struc + { + unsigned long ctx, new_ctx; + unsigned long orig_pgsz_bits; +- int new_version; + + spin_lock(&ctx_alloc_lock); ++retry: ++ /* wrap might have happened, test again if our context became valid */ ++ if (unlikely(CTX_VALID(mm->context))) ++ goto out; + orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); + ctx = (tlb_context_cache + 1) & CTX_NR_MASK; + new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); +- new_version = 0; + if (new_ctx >= (1 << CTX_NR_BITS)) { + new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); + if (new_ctx >= ctx) { +- int i; +- new_ctx = (tlb_context_cache & CTX_VERSION_MASK) + +- CTX_FIRST_VERSION + 1; +- if (new_ctx == 1) +- new_ctx = CTX_FIRST_VERSION + 1; +- +- /* Don't call memset, for 16 entries that's just +- * plain silly... +- */ +- mmu_context_bmap[0] = 3; +- mmu_context_bmap[1] = 0; +- mmu_context_bmap[2] = 0; +- mmu_context_bmap[3] = 0; +- for (i = 4; i < CTX_BMAP_SLOTS; i += 4) { +- mmu_context_bmap[i + 0] = 0; +- mmu_context_bmap[i + 1] = 0; +- mmu_context_bmap[i + 2] = 0; +- mmu_context_bmap[i + 3] = 0; +- } +- new_version = 1; +- goto out; ++ mmu_context_wrap(); ++ goto retry; + } + } + if (mm->context.sparc64_ctx_val) + cpumask_clear(mm_cpumask(mm)); + mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63)); + new_ctx |= (tlb_context_cache & CTX_VERSION_MASK); +-out: + tlb_context_cache = new_ctx; + mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; ++out: + spin_unlock(&ctx_alloc_lock); +- +- if (unlikely(new_version)) +- smp_new_mmu_context_version(); + } + + static int numa_enabled = 1; diff --git a/queue-4.11/sparc64-redefine-first-version.patch b/queue-4.11/sparc64-redefine-first-version.patch new file mode 100644 index 00000000000..7a373dcc158 --- /dev/null +++ b/queue-4.11/sparc64-redefine-first-version.patch @@ -0,0 +1,58 @@ +From foo@baz Thu Jun 8 08:58:43 CEST 2017 +From: Pavel Tatashin +Date: Wed, 31 May 2017 11:25:22 -0400 +Subject: sparc64: redefine first version + +From: Pavel Tatashin + + +[ Upstream commit c4415235b2be0cc791572e8e7f7466ab8f73a2bf ] + +CTX_FIRST_VERSION defines the first context version, but also it defines +first context. This patch redefines it to only include the first context +version. + +Signed-off-by: Pavel Tatashin +Reviewed-by: Bob Picco +Reviewed-by: Steven Sistare +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/mmu_64.h | 2 +- + arch/sparc/mm/init_64.c | 6 +++--- + 2 files changed, 4 insertions(+), 4 deletions(-) + +--- a/arch/sparc/include/asm/mmu_64.h ++++ b/arch/sparc/include/asm/mmu_64.h +@@ -52,7 +52,7 @@ + #define CTX_NR_MASK TAG_CONTEXT_BITS + #define CTX_HW_MASK (CTX_NR_MASK | CTX_PGSZ_MASK) + +-#define CTX_FIRST_VERSION ((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL)) ++#define CTX_FIRST_VERSION BIT(CTX_VERSION_SHIFT) + #define CTX_VALID(__ctx) \ + (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK)) + #define CTX_HWBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_HW_MASK) +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -707,7 +707,7 @@ EXPORT_SYMBOL(__flush_dcache_range); + + /* get_new_mmu_context() uses "cache + 1". */ + DEFINE_SPINLOCK(ctx_alloc_lock); +-unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1; ++unsigned long tlb_context_cache = CTX_FIRST_VERSION; + #define MAX_CTX_NR (1UL << CTX_NR_BITS) + #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR) + DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); +@@ -738,9 +738,9 @@ void get_new_mmu_context(struct mm_struc + if (new_ctx >= ctx) { + int i; + new_ctx = (tlb_context_cache & CTX_VERSION_MASK) + +- CTX_FIRST_VERSION; ++ CTX_FIRST_VERSION + 1; + if (new_ctx == 1) +- new_ctx = CTX_FIRST_VERSION; ++ new_ctx = CTX_FIRST_VERSION + 1; + + /* Don't call memset, for 16 entries that's just + * plain silly... diff --git a/queue-4.11/sparc64-reset-mm-cpumask-after-wrap.patch b/queue-4.11/sparc64-reset-mm-cpumask-after-wrap.patch new file mode 100644 index 00000000000..3729c270328 --- /dev/null +++ b/queue-4.11/sparc64-reset-mm-cpumask-after-wrap.patch @@ -0,0 +1,42 @@ +From foo@baz Thu Jun 8 08:58:43 CEST 2017 +From: Pavel Tatashin +Date: Wed, 31 May 2017 11:25:20 -0400 +Subject: sparc64: reset mm cpumask after wrap + +From: Pavel Tatashin + + +[ Upstream commit 588974857359861891f478a070b1dc7ae04a3880 ] + +After a wrap (getting a new context version) a process must get a new +context id, which means that we would need to flush the context id from +the TLB before running for the first time with this ID on every CPU. But, +we use mm_cpumask to determine if this process has been running on this CPU +before, and this mask is not reset after a wrap. So, there are two possible +fixes for this issue: + +1. Clear mm cpumask whenever mm gets a new context id +2. Unconditionally flush context every time process is running on a CPU + +This patch implements the first solution + +Signed-off-by: Pavel Tatashin +Reviewed-by: Bob Picco +Reviewed-by: Steven Sistare +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/init_64.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -759,6 +759,8 @@ void get_new_mmu_context(struct mm_struc + goto out; + } + } ++ if (mm->context.sparc64_ctx_val) ++ cpumask_clear(mm_cpumask(mm)); + mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63)); + new_ctx |= (tlb_context_cache & CTX_VERSION_MASK); + out: diff --git a/queue-4.11/tcp-disallow-cwnd-undo-when-switching-congestion-control.patch b/queue-4.11/tcp-disallow-cwnd-undo-when-switching-congestion-control.patch new file mode 100644 index 00000000000..a81c4f77e07 --- /dev/null +++ b/queue-4.11/tcp-disallow-cwnd-undo-when-switching-congestion-control.patch @@ -0,0 +1,44 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Yuchung Cheng +Date: Wed, 31 May 2017 11:21:27 -0700 +Subject: tcp: disallow cwnd undo when switching congestion control + +From: Yuchung Cheng + + +[ Upstream commit 44abafc4cc094214a99f860f778c48ecb23422fc ] + +When the sender switches its congestion control during loss +recovery, if the recovery is spurious then it may incorrectly +revert cwnd and ssthresh to the older values set by a previous +congestion control. Consider a congestion control (like BBR) +that does not use ssthresh and keeps it infinite: the connection +may incorrectly revert cwnd to an infinite value when switching +from BBR to another congestion control. + +This patch fixes it by disallowing such cwnd undo operation +upon switching congestion control. Note that undo_marker +is not reset s.t. the packets that were incorrectly marked +lost would be corrected. We only avoid undoing the cwnd in +tcp_undo_cwnd_reduction(). + +Signed-off-by: Yuchung Cheng +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: Neal Cardwell +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_cong.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/tcp_cong.c ++++ b/net/ipv4/tcp_cong.c +@@ -180,6 +180,7 @@ void tcp_init_congestion_control(struct + { + const struct inet_connection_sock *icsk = inet_csk(sk); + ++ tcp_sk(sk)->prior_ssthresh = 0; + if (icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); + if (tcp_ca_needs_ecn(sk)) diff --git a/queue-4.11/vxlan-eliminate-cached-dst-leak.patch b/queue-4.11/vxlan-eliminate-cached-dst-leak.patch new file mode 100644 index 00000000000..dd24d18355d --- /dev/null +++ b/queue-4.11/vxlan-eliminate-cached-dst-leak.patch @@ -0,0 +1,64 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Lance Richardson +Date: Mon, 29 May 2017 13:25:57 -0400 +Subject: vxlan: eliminate cached dst leak + +From: Lance Richardson + + +[ Upstream commit 35cf2845563c1aaa01d27bd34d64795c4ae72700 ] + +After commit 0c1d70af924b ("net: use dst_cache for vxlan device"), +cached dst entries could be leaked when more than one remote was +present for a given vxlan_fdb entry, causing subsequent netns +operations to block indefinitely and "unregister_netdevice: waiting +for lo to become free." messages to appear in the kernel log. + +Fix by properly releasing cached dst and freeing resources in this +case. + +Fixes: 0c1d70af924b ("net: use dst_cache for vxlan device") +Signed-off-by: Lance Richardson +Acked-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 20 +++++++++++++++++--- + 1 file changed, 17 insertions(+), 3 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -740,6 +740,22 @@ static void vxlan_fdb_destroy(struct vxl + call_rcu(&f->rcu, vxlan_fdb_free); + } + ++static void vxlan_dst_free(struct rcu_head *head) ++{ ++ struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu); ++ ++ dst_cache_destroy(&rd->dst_cache); ++ kfree(rd); ++} ++ ++static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, ++ struct vxlan_rdst *rd) ++{ ++ list_del_rcu(&rd->list); ++ vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH); ++ call_rcu(&rd->rcu, vxlan_dst_free); ++} ++ + static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, + union vxlan_addr *ip, __be16 *port, __be32 *src_vni, + __be32 *vni, u32 *ifindex) +@@ -864,9 +880,7 @@ static int __vxlan_fdb_delete(struct vxl + * otherwise destroy the fdb entry + */ + if (rd && !list_is_singular(&f->remotes)) { +- list_del_rcu(&rd->list); +- vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH); +- kfree_rcu(rd, rcu); ++ vxlan_fdb_dst_destroy(vxlan, f, rd); + goto out; + } + diff --git a/queue-4.11/vxlan-fix-use-after-free-on-deletion.patch b/queue-4.11/vxlan-fix-use-after-free-on-deletion.patch new file mode 100644 index 00000000000..d1ca5a4e349 --- /dev/null +++ b/queue-4.11/vxlan-fix-use-after-free-on-deletion.patch @@ -0,0 +1,81 @@ +From foo@baz Thu Jun 8 08:58:08 CEST 2017 +From: Mark Bloch +Date: Fri, 2 Jun 2017 03:24:08 +0300 +Subject: vxlan: fix use-after-free on deletion + +From: Mark Bloch + + +[ Upstream commit a53cb29b0af346af44e4abf13d7e59f807fba690 ] + +Adding a vxlan interface to a socket isn't symmetrical, while adding +is done in vxlan_open() the deletion is done in vxlan_dellink(). +This can cause a use-after-free error when we close the vxlan +interface before deleting it. + +We add vxlan_vs_del_dev() to match vxlan_vs_add_dev() and call +it from vxlan_stop() to match the call from vxlan_open(). + +Fixes: 56ef9c909b40 ("vxlan: Move socket initialization to within rtnl scope") +Acked-by: Jiri Benc +Tested-by: Roi Dayan +Signed-off-by: Mark Bloch +Acked-by: Roopa Prabhu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -59,6 +59,8 @@ static const u8 all_zeros_mac[ETH_ALEN + + + static int vxlan_sock_add(struct vxlan_dev *vxlan); + ++static void vxlan_vs_del_dev(struct vxlan_dev *vxlan); ++ + /* per-network namespace private data for this module */ + struct vxlan_net { + struct list_head vxlan_list; +@@ -1081,6 +1083,8 @@ static void vxlan_sock_release(struct vx + rcu_assign_pointer(vxlan->vn4_sock, NULL); + synchronize_net(); + ++ vxlan_vs_del_dev(vxlan); ++ + if (__vxlan_sock_release_prep(sock4)) { + udp_tunnel_sock_release(sock4->sock); + kfree(sock4); +@@ -2352,6 +2356,15 @@ static void vxlan_cleanup(unsigned long + mod_timer(&vxlan->age_timer, next_timer); + } + ++static void vxlan_vs_del_dev(struct vxlan_dev *vxlan) ++{ ++ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); ++ ++ spin_lock(&vn->sock_lock); ++ hlist_del_init_rcu(&vxlan->hlist); ++ spin_unlock(&vn->sock_lock); ++} ++ + static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan) + { + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); +@@ -3289,15 +3302,9 @@ static int vxlan_changelink(struct net_d + static void vxlan_dellink(struct net_device *dev, struct list_head *head) + { + struct vxlan_dev *vxlan = netdev_priv(dev); +- struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + + vxlan_flush(vxlan, true); + +- spin_lock(&vn->sock_lock); +- if (!hlist_unhashed(&vxlan->hlist)) +- hlist_del_rcu(&vxlan->hlist); +- spin_unlock(&vn->sock_lock); +- + gro_cells_destroy(&vxlan->gro_cells); + list_del(&vxlan->next); + unregister_netdevice_queue(dev, head); diff --git a/queue-4.9/series b/queue-4.9/series new file mode 100644 index 00000000000..c2dd8aa42a3 --- /dev/null +++ b/queue-4.9/series @@ -0,0 +1,22 @@ +bnx2x-fix-multi-cos.patch +vxlan-eliminate-cached-dst-leak.patch +ipv6-xfrm-handle-errors-reported-by-xfrm6_find_1stfragopt.patch +cxgb4-avoid-enabling-napi-twice-to-the-same-queue.patch +tcp-disallow-cwnd-undo-when-switching-congestion-control.patch +vxlan-fix-use-after-free-on-deletion.patch +ipv6-fix-leak-in-ipv6_gso_segment.patch +net-ping-do-not-abuse-udp_poll.patch +net-ipv6-fix-calipso-causing-gpf-with-datagram-support.patch +net-ethoc-enable-napi-before-poll-may-be-scheduled.patch +net-stmmac-fix-completely-hung-tx-when-using-tso.patch +net-bridge-start-hello-timer-only-if-device-is-up.patch +sparc64-add-__multi3-for-gcc-7.x-and-later.patch +sparc64-mm-fix-copy_tsb-to-correctly-copy-huge-page-tsbs.patch +sparc-machine-description-indices-can-vary.patch +sparc64-reset-mm-cpumask-after-wrap.patch +sparc64-combine-activate_mm-and-switch_mm.patch +sparc64-redefine-first-version.patch +sparc64-add-per-cpu-mm-of-secondary-contexts.patch +sparc64-new-context-wrap.patch +sparc64-delete-old-wrap-code.patch +arch-sparc-support-nr_cpus-4096.patch