From: Greg Kroah-Hartman Date: Fri, 15 Sep 2017 17:08:20 +0000 (-0700) Subject: 4.4-stable patches X-Git-Tag: v4.9.51~26 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4b606b9b450252deefdfbe955b638507bbf7dc29;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: gianfar-fix-tx-flow-control-deactivation.patch ip6_gre-update-mtu-properly-in-ip6gre_err.patch ipv6-accept-64k-1-packet-length-in-ip6_find_1stfragopt.patch ipv6-add-rcu-grace-period-before-freeing-fib6_node.patch ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch ipv6-fix-sparse-warning-on-rt6i_node.patch ipv6-fix-typo-in-fib6_net_exit.patch qlge-avoid-memcpy-buffer-overflow.patch revert-net-fix-percpu-memory-leaks.patch revert-net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch tcp-initialize-rcv_mss-to-tcp_min_mss-instead-of-0.patch --- diff --git a/queue-4.4/gianfar-fix-tx-flow-control-deactivation.patch b/queue-4.4/gianfar-fix-tx-flow-control-deactivation.patch new file mode 100644 index 00000000000..0e19772ac7e --- /dev/null +++ b/queue-4.4/gianfar-fix-tx-flow-control-deactivation.patch @@ -0,0 +1,39 @@ +From foo@baz Fri Sep 15 10:03:45 PDT 2017 +From: Claudiu Manoil +Date: Mon, 4 Sep 2017 10:45:28 +0300 +Subject: gianfar: Fix Tx flow control deactivation + +From: Claudiu Manoil + + +[ Upstream commit 5d621672bc1a1e5090c1ac5432a18c79e0e13e03 ] + +The wrong register is checked for the Tx flow control bit, +it should have been maccfg1 not maccfg2. +This went unnoticed for so long probably because the impact is +hardly visible, not to mention the tangled code from adjust_link(). +First, link flow control (i.e. handling of Rx/Tx link level pause frames) +is disabled by default (needs to be enabled via 'ethtool -A'). +Secondly, maccfg2 always returns 0 for tx_flow_oldval (except for a few +old boards), which results in Tx flow control remaining always on +once activated. + +Fixes: 45b679c9a3ccd9e34f28e6ec677b812a860eb8eb ("gianfar: Implement PAUSE frame generation support") +Signed-off-by: Claudiu Manoil +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/gianfar.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/freescale/gianfar.c ++++ b/drivers/net/ethernet/freescale/gianfar.c +@@ -3676,7 +3676,7 @@ static noinline void gfar_update_link_st + u32 tempval1 = gfar_read(®s->maccfg1); + u32 tempval = gfar_read(®s->maccfg2); + u32 ecntrl = gfar_read(®s->ecntrl); +- u32 tx_flow_oldval = (tempval & MACCFG1_TX_FLOW); ++ u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW); + + if (phydev->duplex != priv->oldduplex) { + if (!(phydev->duplex)) diff --git a/queue-4.4/ip6_gre-update-mtu-properly-in-ip6gre_err.patch b/queue-4.4/ip6_gre-update-mtu-properly-in-ip6gre_err.patch new file mode 100644 index 00000000000..881f92823b6 --- /dev/null +++ b/queue-4.4/ip6_gre-update-mtu-properly-in-ip6gre_err.patch @@ -0,0 +1,46 @@ +From foo@baz Fri Sep 15 10:03:45 PDT 2017 +From: Xin Long +Date: Tue, 5 Sep 2017 17:26:33 +0800 +Subject: ip6_gre: update mtu properly in ip6gre_err + +From: Xin Long + + +[ Upstream commit 5c25f30c93fdc5bf25e62101aeaae7a4f9b421b3 ] + +Now when probessing ICMPV6_PKT_TOOBIG, ip6gre_err only subtracts the +offset of gre header from mtu info. The expected mtu of gre device +should also subtract gre header. Otherwise, the next packets still +can't be sent out. + +Jianlin found this issue when using the topo: + client(ip6gre)<---->(nic1)route(nic2)<----->(ip6gre)server + +and reducing nic2's mtu, then both tcp and sctp's performance with +big size data became 0. + +This patch is to fix it by also subtracting grehdr (tun->tun_hlen) +from mtu info when updating gre device's mtu in ip6gre_err(). It +also needs to subtract ETH_HLEN if gre dev'type is ARPHRD_ETHER. + +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -433,7 +433,9 @@ static void ip6gre_err(struct sk_buff *s + } + break; + case ICMPV6_PKT_TOOBIG: +- mtu = be32_to_cpu(info) - offset; ++ mtu = be32_to_cpu(info) - offset - t->tun_hlen; ++ if (t->dev->type == ARPHRD_ETHER) ++ mtu -= ETH_HLEN; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + t->dev->mtu = mtu; diff --git a/queue-4.4/ipv6-accept-64k-1-packet-length-in-ip6_find_1stfragopt.patch b/queue-4.4/ipv6-accept-64k-1-packet-length-in-ip6_find_1stfragopt.patch new file mode 100644 index 00000000000..d9053fe77bb --- /dev/null +++ b/queue-4.4/ipv6-accept-64k-1-packet-length-in-ip6_find_1stfragopt.patch @@ -0,0 +1,50 @@ +From foo@baz Fri Sep 15 10:03:45 PDT 2017 +From: Stefano Brivio +Date: Fri, 18 Aug 2017 14:40:53 +0200 +Subject: ipv6: accept 64k - 1 packet length in ip6_find_1stfragopt() + +From: Stefano Brivio + + +[ Upstream commit 3de33e1ba0506723ab25734e098cf280ecc34756 ] + +A packet length of exactly IPV6_MAXPLEN is allowed, we should +refuse parsing options only if the size is 64KiB or more. + +While at it, remove one extra variable and one assignment which +were also introduced by the commit that introduced the size +check. Checking the sum 'offset + len' and only later adding +'len' to 'offset' doesn't provide any advantage over directly +summing to 'offset' and checking it. + +Fixes: 6399f1fae4ec ("ipv6: avoid overflow of offset in ip6_find_1stfragopt") +Signed-off-by: Stefano Brivio +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/output_core.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/net/ipv6/output_core.c ++++ b/net/ipv6/output_core.c +@@ -86,7 +86,6 @@ int ip6_find_1stfragopt(struct sk_buff * + + while (offset <= packet_len) { + struct ipv6_opt_hdr *exthdr; +- unsigned int len; + + switch (**nexthdr) { + +@@ -112,10 +111,9 @@ int ip6_find_1stfragopt(struct sk_buff * + + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + + offset); +- len = ipv6_optlen(exthdr); +- if (len + offset >= IPV6_MAXPLEN) ++ offset += ipv6_optlen(exthdr); ++ if (offset > IPV6_MAXPLEN) + return -EINVAL; +- offset += len; + *nexthdr = &exthdr->nexthdr; + } + diff --git a/queue-4.4/ipv6-add-rcu-grace-period-before-freeing-fib6_node.patch b/queue-4.4/ipv6-add-rcu-grace-period-before-freeing-fib6_node.patch new file mode 100644 index 00000000000..abb4538ca2f --- /dev/null +++ b/queue-4.4/ipv6-add-rcu-grace-period-before-freeing-fib6_node.patch @@ -0,0 +1,163 @@ +From foo@baz Fri Sep 15 10:03:45 PDT 2017 +From: Wei Wang +Date: Mon, 21 Aug 2017 09:47:10 -0700 +Subject: ipv6: add rcu grace period before freeing fib6_node + +From: Wei Wang + + +[ Upstream commit c5cff8561d2d0006e972bd114afd51f082fee77c ] + +We currently keep rt->rt6i_node pointing to the fib6_node for the route. +And some functions make use of this pointer to dereference the fib6_node +from rt structure, e.g. rt6_check(). However, as there is neither +refcount nor rcu taken when dereferencing rt->rt6i_node, it could +potentially cause crashes as rt->rt6i_node could be set to NULL by other +CPUs when doing a route deletion. +This patch introduces an rcu grace period before freeing fib6_node and +makes sure the functions that dereference it takes rcu_read_lock(). + +Note: there is no "Fixes" tag because this bug was there in a very +early stage. + +Signed-off-by: Wei Wang +Acked-by: Eric Dumazet +Acked-by: Martin KaFai Lau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip6_fib.h | 30 +++++++++++++++++++++++++++++- + net/ipv6/ip6_fib.c | 20 ++++++++++++++++---- + net/ipv6/route.c | 14 +++++++++++--- + 3 files changed, 56 insertions(+), 8 deletions(-) + +--- a/include/net/ip6_fib.h ++++ b/include/net/ip6_fib.h +@@ -68,6 +68,7 @@ struct fib6_node { + __u16 fn_flags; + int fn_sernum; + struct rt6_info *rr_ptr; ++ struct rcu_head rcu; + }; + + #ifndef CONFIG_IPV6_SUBTREES +@@ -165,13 +166,40 @@ static inline void rt6_update_expires(st + rt0->rt6i_flags |= RTF_EXPIRES; + } + ++/* Function to safely get fn->sernum for passed in rt ++ * and store result in passed in cookie. ++ * Return true if we can get cookie safely ++ * Return false if not ++ */ ++static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, ++ u32 *cookie) ++{ ++ struct fib6_node *fn; ++ bool status = false; ++ ++ rcu_read_lock(); ++ fn = rcu_dereference(rt->rt6i_node); ++ ++ if (fn) { ++ *cookie = fn->fn_sernum; ++ status = true; ++ } ++ ++ rcu_read_unlock(); ++ return status; ++} ++ + static inline u32 rt6_get_cookie(const struct rt6_info *rt) + { ++ u32 cookie = 0; ++ + if (rt->rt6i_flags & RTF_PCPU || + (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from)) + rt = (struct rt6_info *)(rt->dst.from); + +- return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; ++ rt6_get_cookie_safe(rt, &cookie); ++ ++ return cookie; + } + + static inline void ip6_rt_put(struct rt6_info *rt) +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -150,11 +150,23 @@ static struct fib6_node *node_alloc(void + return fn; + } + +-static void node_free(struct fib6_node *fn) ++static void node_free_immediate(struct fib6_node *fn) ++{ ++ kmem_cache_free(fib6_node_kmem, fn); ++} ++ ++static void node_free_rcu(struct rcu_head *head) + { ++ struct fib6_node *fn = container_of(head, struct fib6_node, rcu); ++ + kmem_cache_free(fib6_node_kmem, fn); + } + ++static void node_free(struct fib6_node *fn) ++{ ++ call_rcu(&fn->rcu, node_free_rcu); ++} ++ + static void rt6_rcu_free(struct rt6_info *rt) + { + call_rcu(&rt->dst.rcu_head, dst_rcu_free); +@@ -588,9 +600,9 @@ insert_above: + + if (!in || !ln) { + if (in) +- node_free(in); ++ node_free_immediate(in); + if (ln) +- node_free(ln); ++ node_free_immediate(ln); + return ERR_PTR(-ENOMEM); + } + +@@ -1015,7 +1027,7 @@ int fib6_add(struct fib6_node *root, str + root, and then (in failure) stale node + in main tree. + */ +- node_free(sfn); ++ node_free_immediate(sfn); + err = PTR_ERR(sn); + goto failure; + } +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1248,7 +1248,9 @@ static void rt6_dst_from_metrics_check(s + + static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) + { +- if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) ++ u32 rt_cookie; ++ ++ if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) + return NULL; + + if (rt6_check_expired(rt)) +@@ -1316,8 +1318,14 @@ static void ip6_link_failure(struct sk_b + if (rt->rt6i_flags & RTF_CACHE) { + dst_hold(&rt->dst); + ip6_del_rt(rt); +- } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { +- rt->rt6i_node->fn_sernum = -1; ++ } else { ++ struct fib6_node *fn; ++ ++ rcu_read_lock(); ++ fn = rcu_dereference(rt->rt6i_node); ++ if (fn && (rt->rt6i_flags & RTF_DEFAULT)) ++ fn->fn_sernum = -1; ++ rcu_read_unlock(); + } + } + } diff --git a/queue-4.4/ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch b/queue-4.4/ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch new file mode 100644 index 00000000000..250be6cdea9 --- /dev/null +++ b/queue-4.4/ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch @@ -0,0 +1,76 @@ +From foo@baz Fri Sep 15 10:03:45 PDT 2017 +From: Sabrina Dubroca +Date: Fri, 8 Sep 2017 10:26:19 +0200 +Subject: ipv6: fix memory leak with multiple tables during netns destruction + +From: Sabrina Dubroca + + +[ Upstream commit ba1cc08d9488c94cb8d94f545305688b72a2a300 ] + +fib6_net_exit only frees the main and local tables. If another table was +created with fib6_alloc_table, we leak it when the netns is destroyed. + +Fix this in the same way ip_fib_net_exit cleans up tables, by walking +through the whole hashtable of fib6_table's. We can get rid of the +special cases for local and main, since they're also part of the +hashtable. + +Reproducer: + ip netns add x + ip -net x -6 rule add from 6003:1::/64 table 100 + ip netns del x + +Reported-by: Jianlin Shi +Fixes: 58f09b78b730 ("[NETNS][IPV6] ip6_fib - make it per network namespace") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_fib.c | 25 +++++++++++++++++++------ + 1 file changed, 19 insertions(+), 6 deletions(-) + +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -203,6 +203,12 @@ static void rt6_release(struct rt6_info + } + } + ++static void fib6_free_table(struct fib6_table *table) ++{ ++ inetpeer_invalidate_tree(&table->tb6_peers); ++ kfree(table); ++} ++ + static void fib6_link_table(struct net *net, struct fib6_table *tb) + { + unsigned int h; +@@ -1885,15 +1891,22 @@ out_timer: + + static void fib6_net_exit(struct net *net) + { ++ unsigned int i; ++ + rt6_ifdown(net, NULL); + del_timer_sync(&net->ipv6.ip6_fib_timer); + +-#ifdef CONFIG_IPV6_MULTIPLE_TABLES +- inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); +- kfree(net->ipv6.fib6_local_tbl); +-#endif +- inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); +- kfree(net->ipv6.fib6_main_tbl); ++ for (i = 0; i < FIB_TABLE_HASHSZ; i++) { ++ struct hlist_head *head = &net->ipv6.fib_table_hash[i]; ++ struct hlist_node *tmp; ++ struct fib6_table *tb; ++ ++ hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) { ++ hlist_del(&tb->tb6_hlist); ++ fib6_free_table(tb); ++ } ++ } ++ + kfree(net->ipv6.fib_table_hash); + kfree(net->ipv6.rt6_stats); + } diff --git a/queue-4.4/ipv6-fix-sparse-warning-on-rt6i_node.patch b/queue-4.4/ipv6-fix-sparse-warning-on-rt6i_node.patch new file mode 100644 index 00000000000..2cf9f6e2b6a --- /dev/null +++ b/queue-4.4/ipv6-fix-sparse-warning-on-rt6i_node.patch @@ -0,0 +1,110 @@ +From foo@baz Fri Sep 15 10:03:45 PDT 2017 +From: Wei Wang +Date: Fri, 25 Aug 2017 15:03:10 -0700 +Subject: ipv6: fix sparse warning on rt6i_node + +From: Wei Wang + + +[ Upstream commit 4e587ea71bf924f7dac621f1351653bd41e446cb ] + +Commit c5cff8561d2d adds rcu grace period before freeing fib6_node. This +generates a new sparse warning on rt->rt6i_node related code: + net/ipv6/route.c:1394:30: error: incompatible types in comparison + expression (different address spaces) + ./include/net/ip6_fib.h:187:14: error: incompatible types in comparison + expression (different address spaces) + +This commit adds "__rcu" tag for rt6i_node and makes sure corresponding +rcu API is used for it. +After this fix, sparse no longer generates the above warning. + +Fixes: c5cff8561d2d ("ipv6: add rcu grace period before freeing fib6_node") +Signed-off-by: Wei Wang +Acked-by: Eric Dumazet +Acked-by: Martin KaFai Lau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip6_fib.h | 2 +- + net/ipv6/addrconf.c | 2 +- + net/ipv6/ip6_fib.c | 11 +++++++---- + net/ipv6/route.c | 3 ++- + 4 files changed, 11 insertions(+), 7 deletions(-) + +--- a/include/net/ip6_fib.h ++++ b/include/net/ip6_fib.h +@@ -103,7 +103,7 @@ struct rt6_info { + * the same cache line. + */ + struct fib6_table *rt6i_table; +- struct fib6_node *rt6i_node; ++ struct fib6_node __rcu *rt6i_node; + + struct in6_addr rt6i_gateway; + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -5152,7 +5152,7 @@ static void __ipv6_ifa_notify(int event, + * our DAD process, so we don't need + * to do it again + */ +- if (!(ifp->rt->rt6i_node)) ++ if (!rcu_access_pointer(ifp->rt->rt6i_node)) + ip6_ins_rt(ifp->rt); + if (ifp->idev->cnf.forwarding) + addrconf_join_anycast(ifp); +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -869,7 +869,7 @@ add: + + rt->dst.rt6_next = iter; + *ins = rt; +- rt->rt6i_node = fn; ++ rcu_assign_pointer(rt->rt6i_node, fn); + atomic_inc(&rt->rt6i_ref); + inet6_rt_notify(RTM_NEWROUTE, rt, info, 0); + info->nl_net->ipv6.rt6_stats->fib_rt_entries++; +@@ -894,7 +894,7 @@ add: + return err; + + *ins = rt; +- rt->rt6i_node = fn; ++ rcu_assign_pointer(rt->rt6i_node, fn); + rt->dst.rt6_next = iter->dst.rt6_next; + atomic_inc(&rt->rt6i_ref); + inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); +@@ -1454,8 +1454,9 @@ static void fib6_del_route(struct fib6_n + + int fib6_del(struct rt6_info *rt, struct nl_info *info) + { ++ struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, ++ lockdep_is_held(&rt->rt6i_table->tb6_lock)); + struct net *net = info->nl_net; +- struct fib6_node *fn = rt->rt6i_node; + struct rt6_info **rtp; + + #if RT6_DEBUG >= 2 +@@ -1644,7 +1645,9 @@ static int fib6_clean_node(struct fib6_w + if (res) { + #if RT6_DEBUG >= 2 + pr_debug("%s: del failed: rt=%p@%p err=%d\n", +- __func__, rt, rt->rt6i_node, res); ++ __func__, rt, ++ rcu_access_pointer(rt->rt6i_node), ++ res); + #endif + continue; + } +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1342,7 +1342,8 @@ static void rt6_do_update_pmtu(struct rt + static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) + { + return !(rt->rt6i_flags & RTF_CACHE) && +- (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node); ++ (rt->rt6i_flags & RTF_PCPU || ++ rcu_access_pointer(rt->rt6i_node)); + } + + static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, diff --git a/queue-4.4/ipv6-fix-typo-in-fib6_net_exit.patch b/queue-4.4/ipv6-fix-typo-in-fib6_net_exit.patch new file mode 100644 index 00000000000..16620a43bc4 --- /dev/null +++ b/queue-4.4/ipv6-fix-typo-in-fib6_net_exit.patch @@ -0,0 +1,31 @@ +From foo@baz Fri Sep 15 10:03:45 PDT 2017 +From: Eric Dumazet +Date: Fri, 8 Sep 2017 15:48:47 -0700 +Subject: ipv6: fix typo in fib6_net_exit() + +From: Eric Dumazet + + +[ Upstream commit 32a805baf0fb70b6dbedefcd7249ac7f580f9e3b ] + +IPv6 FIB should use FIB6_TABLE_HASHSZ, not FIB_TABLE_HASHSZ. + +Fixes: ba1cc08d9488 ("ipv6: fix memory leak with multiple tables during netns destruction") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_fib.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -1896,7 +1896,7 @@ static void fib6_net_exit(struct net *ne + rt6_ifdown(net, NULL); + del_timer_sync(&net->ipv6.ip6_fib_timer); + +- for (i = 0; i < FIB_TABLE_HASHSZ; i++) { ++ for (i = 0; i < FIB6_TABLE_HASHSZ; i++) { + struct hlist_head *head = &net->ipv6.fib_table_hash[i]; + struct hlist_node *tmp; + struct fib6_table *tb; diff --git a/queue-4.4/qlge-avoid-memcpy-buffer-overflow.patch b/queue-4.4/qlge-avoid-memcpy-buffer-overflow.patch new file mode 100644 index 00000000000..fb239207df6 --- /dev/null +++ b/queue-4.4/qlge-avoid-memcpy-buffer-overflow.patch @@ -0,0 +1,44 @@ +From foo@baz Fri Sep 15 10:03:45 PDT 2017 +From: Arnd Bergmann +Date: Wed, 23 Aug 2017 15:59:49 +0200 +Subject: qlge: avoid memcpy buffer overflow + +From: Arnd Bergmann + + +[ Upstream commit e58f95831e7468d25eb6e41f234842ecfe6f014f ] + +gcc-8.0.0 (snapshot) points out that we copy a variable-length string +into a fixed length field using memcpy() with the destination length, +and that ends up copying whatever follows the string: + + inlined from 'ql_core_dump' at drivers/net/ethernet/qlogic/qlge/qlge_dbg.c:1106:2: +drivers/net/ethernet/qlogic/qlge/qlge_dbg.c:708:2: error: 'memcpy' reading 15 bytes from a region of size 14 [-Werror=stringop-overflow=] + memcpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1); + +Changing it to use strncpy() will instead zero-pad the destination, +which seems to be the right thing to do here. + +The bug is probably harmless, but it seems like a good idea to address +it in stable kernels as well, if only for the purpose of building with +gcc-8 without warnings. + +Fixes: a61f80261306 ("qlge: Add ethtool register dump function.") +Signed-off-by: Arnd Bergmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qlge/qlge_dbg.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c ++++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c +@@ -724,7 +724,7 @@ static void ql_build_coredump_seg_header + seg_hdr->cookie = MPI_COREDUMP_COOKIE; + seg_hdr->segNum = seg_number; + seg_hdr->segSize = seg_size; +- memcpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1); ++ strncpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1); + } + + /* diff --git a/queue-4.4/revert-net-fix-percpu-memory-leaks.patch b/queue-4.4/revert-net-fix-percpu-memory-leaks.patch new file mode 100644 index 00000000000..345bedfa6dd --- /dev/null +++ b/queue-4.4/revert-net-fix-percpu-memory-leaks.patch @@ -0,0 +1,151 @@ +From foo@baz Fri Sep 15 10:03:45 PDT 2017 +From: Jesper Dangaard Brouer +Date: Fri, 1 Sep 2017 11:26:13 +0200 +Subject: Revert "net: fix percpu memory leaks" + +From: Jesper Dangaard Brouer + + +[ Upstream commit 5a63643e583b6a9789d7a225ae076fb4e603991c ] + +This reverts commit 1d6119baf0610f813eb9d9580eb4fd16de5b4ceb. + +After reverting commit 6d7b857d541e ("net: use lib/percpu_counter API +for fragmentation mem accounting") then here is no need for this +fix-up patch. As percpu_counter is no longer used, it cannot +memory leak it any-longer. + +Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") +Fixes: 1d6119baf061 ("net: fix percpu memory leaks") +Signed-off-by: Jesper Dangaard Brouer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_frag.h | 7 +------ + net/ieee802154/6lowpan/reassembly.c | 11 +++-------- + net/ipv4/ip_fragment.c | 12 +++--------- + net/ipv6/netfilter/nf_conntrack_reasm.c | 12 +++--------- + net/ipv6/reassembly.c | 12 +++--------- + 5 files changed, 13 insertions(+), 41 deletions(-) + +--- a/include/net/inet_frag.h ++++ b/include/net/inet_frag.h +@@ -103,15 +103,10 @@ struct inet_frags { + int inet_frags_init(struct inet_frags *); + void inet_frags_fini(struct inet_frags *); + +-static inline int inet_frags_init_net(struct netns_frags *nf) ++static inline void inet_frags_init_net(struct netns_frags *nf) + { + atomic_set(&nf->mem, 0); +- return 0; + } +-static inline void inet_frags_uninit_net(struct netns_frags *nf) +-{ +-} +- + void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); + + void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); +--- a/net/ieee802154/6lowpan/reassembly.c ++++ b/net/ieee802154/6lowpan/reassembly.c +@@ -580,19 +580,14 @@ static int __net_init lowpan_frags_init_ + { + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); +- int res; + + ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; + ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; + +- res = inet_frags_init_net(&ieee802154_lowpan->frags); +- if (res) +- return res; +- res = lowpan_frags_ns_sysctl_register(net); +- if (res) +- inet_frags_uninit_net(&ieee802154_lowpan->frags); +- return res; ++ inet_frags_init_net(&ieee802154_lowpan->frags); ++ ++ return lowpan_frags_ns_sysctl_register(net); + } + + static void __net_exit lowpan_frags_exit_net(struct net *net) +--- a/net/ipv4/ip_fragment.c ++++ b/net/ipv4/ip_fragment.c +@@ -840,8 +840,6 @@ static void __init ip4_frags_ctl_registe + + static int __net_init ipv4_frags_init_net(struct net *net) + { +- int res; +- + /* Fragment cache limits. + * + * The fragment memory accounting code, (tries to) account for +@@ -865,13 +863,9 @@ static int __net_init ipv4_frags_init_ne + */ + net->ipv4.frags.timeout = IP_FRAG_TIME; + +- res = inet_frags_init_net(&net->ipv4.frags); +- if (res) +- return res; +- res = ip4_frags_ns_ctl_register(net); +- if (res) +- inet_frags_uninit_net(&net->ipv4.frags); +- return res; ++ inet_frags_init_net(&net->ipv4.frags); ++ ++ return ip4_frags_ns_ctl_register(net); + } + + static void __net_exit ipv4_frags_exit_net(struct net *net) +--- a/net/ipv6/netfilter/nf_conntrack_reasm.c ++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c +@@ -649,18 +649,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_or + + static int nf_ct_net_init(struct net *net) + { +- int res; +- + net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; + net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; +- res = inet_frags_init_net(&net->nf_frag.frags); +- if (res) +- return res; +- res = nf_ct_frag6_sysctl_register(net); +- if (res) +- inet_frags_uninit_net(&net->nf_frag.frags); +- return res; ++ inet_frags_init_net(&net->nf_frag.frags); ++ ++ return nf_ct_frag6_sysctl_register(net); + } + + static void nf_ct_net_exit(struct net *net) +--- a/net/ipv6/reassembly.c ++++ b/net/ipv6/reassembly.c +@@ -708,19 +708,13 @@ static void ip6_frags_sysctl_unregister( + + static int __net_init ipv6_frags_init_net(struct net *net) + { +- int res; +- + net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; + net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; + +- res = inet_frags_init_net(&net->ipv6.frags); +- if (res) +- return res; +- res = ip6_frags_ns_sysctl_register(net); +- if (res) +- inet_frags_uninit_net(&net->ipv6.frags); +- return res; ++ inet_frags_init_net(&net->ipv6.frags); ++ ++ return ip6_frags_ns_sysctl_register(net); + } + + static void __net_exit ipv6_frags_exit_net(struct net *net) diff --git a/queue-4.4/revert-net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch b/queue-4.4/revert-net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch new file mode 100644 index 00000000000..6caaa09e3c9 --- /dev/null +++ b/queue-4.4/revert-net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch @@ -0,0 +1,104 @@ +From foo@baz Fri Sep 15 10:03:45 PDT 2017 +From: Florian Fainelli +Date: Wed, 30 Aug 2017 17:49:29 -0700 +Subject: Revert "net: phy: Correctly process PHY_HALTED in phy_stop_machine()" + +From: Florian Fainelli + + +[ Upstream commit ebc8254aeae34226d0bc8fda309fd9790d4dccfe ] + +This reverts commit 7ad813f208533cebfcc32d3d7474dc1677d1b09a ("net: phy: +Correctly process PHY_HALTED in phy_stop_machine()") because it is +creating the possibility for a NULL pointer dereference. + +David Daney provide the following call trace and diagram of events: + +When ndo_stop() is called we call: + + phy_disconnect() + +---> phy_stop_interrupts() implies: phydev->irq = PHY_POLL; + +---> phy_stop_machine() + | +---> phy_state_machine() + | +----> queue_delayed_work(): Work queued. + +--->phy_detach() implies: phydev->attached_dev = NULL; + +Now at a later time the queued work does: + + phy_state_machine() + +---->netif_carrier_off(phydev->attached_dev): Oh no! It is NULL: + + CPU 12 Unable to handle kernel paging request at virtual address +0000000000000048, epc == ffffffff80de37ec, ra == ffffffff80c7c +Oops[#1]: +CPU: 12 PID: 1502 Comm: kworker/12:1 Not tainted 4.9.43-Cavium-Octeon+ #1 +Workqueue: events_power_efficient phy_state_machine +task: 80000004021ed100 task.stack: 8000000409d70000 +$ 0 : 0000000000000000 ffffffff84720060 0000000000000048 0000000000000004 +$ 4 : 0000000000000000 0000000000000001 0000000000000004 0000000000000000 +$ 8 : 0000000000000000 0000000000000000 00000000ffff98f3 0000000000000000 +$12 : 8000000409d73fe0 0000000000009c00 ffffffff846547c8 000000000000af3b +$16 : 80000004096bab68 80000004096babd0 0000000000000000 80000004096ba800 +$20 : 0000000000000000 0000000000000000 ffffffff81090000 0000000000000008 +$24 : 0000000000000061 ffffffff808637b0 +$28 : 8000000409d70000 8000000409d73cf0 80000000271bd300 ffffffff80c7804c +Hi : 000000000000002a +Lo : 000000000000003f +epc : ffffffff80de37ec netif_carrier_off+0xc/0x58 +ra : ffffffff80c7804c phy_state_machine+0x48c/0x4f8 +Status: 14009ce3 KX SX UX KERNEL EXL IE +Cause : 00800008 (ExcCode 02) +BadVA : 0000000000000048 +PrId : 000d9501 (Cavium Octeon III) +Modules linked in: +Process kworker/12:1 (pid: 1502, threadinfo=8000000409d70000, +task=80000004021ed100, tls=0000000000000000) +Stack : 8000000409a54000 80000004096bab68 80000000271bd300 80000000271c1e00 + 0000000000000000 ffffffff808a1708 8000000409a54000 80000000271bd300 + 80000000271bd320 8000000409a54030 ffffffff80ff0f00 0000000000000001 + ffffffff81090000 ffffffff808a1ac0 8000000402182080 ffffffff84650000 + 8000000402182080 ffffffff84650000 ffffffff80ff0000 8000000409a54000 + ffffffff808a1970 0000000000000000 80000004099e8000 8000000402099240 + 0000000000000000 ffffffff808a8598 0000000000000000 8000000408eeeb00 + 8000000409a54000 00000000810a1d00 0000000000000000 8000000409d73de8 + 8000000409d73de8 0000000000000088 000000000c009c00 8000000409d73e08 + 8000000409d73e08 8000000402182080 ffffffff808a84d0 8000000402182080 + ... +Call Trace: +[] netif_carrier_off+0xc/0x58 +[] phy_state_machine+0x48c/0x4f8 +[] process_one_work+0x158/0x368 +[] worker_thread+0x150/0x4c0 +[] kthread+0xc8/0xe0 +[] ret_from_kernel_thread+0x14/0x1c + +The original motivation for this change originated from Marc Gonzales +indicating that his network driver did not have its adjust_link callback +executing with phydev->link = 0 while he was expecting it. + +PHYLIB has never made any such guarantees ever because phy_stop() merely just +tells the workqueue to move into PHY_HALTED state which will happen +asynchronously. + +Reported-by: Geert Uytterhoeven +Reported-by: David Daney +Fixes: 7ad813f20853 ("net: phy: Correctly process PHY_HALTED in phy_stop_machine()") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/drivers/net/phy/phy.c ++++ b/drivers/net/phy/phy.c +@@ -541,9 +541,6 @@ void phy_stop_machine(struct phy_device + if (phydev->state > PHY_UP && phydev->state != PHY_HALTED) + phydev->state = PHY_UP; + mutex_unlock(&phydev->lock); +- +- /* Now we can run the state machine synchronously */ +- phy_state_machine(&phydev->state_queue.work); + } + + /** diff --git a/queue-4.4/revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch b/queue-4.4/revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch new file mode 100644 index 00000000000..fa409d8d525 --- /dev/null +++ b/queue-4.4/revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch @@ -0,0 +1,147 @@ +From foo@baz Fri Sep 15 10:03:45 PDT 2017 +From: Jesper Dangaard Brouer +Date: Fri, 1 Sep 2017 11:26:08 +0200 +Subject: Revert "net: use lib/percpu_counter API for fragmentation mem accounting" + +From: Jesper Dangaard Brouer + + +[ Upstream commit fb452a1aa3fd4034d7999e309c5466ff2d7005aa ] + +This reverts commit 6d7b857d541ecd1d9bd997c97242d4ef94b19de2. + +There is a bug in fragmentation codes use of the percpu_counter API, +that can cause issues on systems with many CPUs. + +The frag_mem_limit() just reads the global counter (fbc->count), +without considering other CPUs can have upto batch size (130K) that +haven't been subtracted yet. Due to the 3MBytes lower thresh limit, +this become dangerous at >=24 CPUs (3*1024*1024/130000=24). + +The correct API usage would be to use __percpu_counter_compare() which +does the right thing, and takes into account the number of (online) +CPUs and batch size, to account for this and call __percpu_counter_sum() +when needed. + +We choose to revert the use of the lib/percpu_counter API for frag +memory accounting for several reasons: + +1) On systems with CPUs > 24, the heavier fully locked + __percpu_counter_sum() is always invoked, which will be more + expensive than the atomic_t that is reverted to. + +Given systems with more than 24 CPUs are becoming common this doesn't +seem like a good option. To mitigate this, the batch size could be +decreased and thresh be increased. + +2) The add_frag_mem_limit+sub_frag_mem_limit pairs happen on the RX + CPU, before SKBs are pushed into sockets on remote CPUs. Given + NICs can only hash on L2 part of the IP-header, the NIC-RXq's will + likely be limited. Thus, a fair chance that atomic add+dec happen + on the same CPU. + +Revert note that commit 1d6119baf061 ("net: fix percpu memory leaks") +removed init_frag_mem_limit() and instead use inet_frags_init_net(). +After this revert, inet_frags_uninit_net() becomes empty. + +Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") +Fixes: 1d6119baf061 ("net: fix percpu memory leaks") +Signed-off-by: Jesper Dangaard Brouer +Acked-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_frag.h | 36 +++++++++--------------------------- + net/ipv4/inet_fragment.c | 4 +--- + 2 files changed, 10 insertions(+), 30 deletions(-) + +--- a/include/net/inet_frag.h ++++ b/include/net/inet_frag.h +@@ -1,14 +1,9 @@ + #ifndef __NET_FRAG_H__ + #define __NET_FRAG_H__ + +-#include +- + struct netns_frags { +- /* The percpu_counter "mem" need to be cacheline aligned. +- * mem.count must not share cacheline with other writers +- */ +- struct percpu_counter mem ____cacheline_aligned_in_smp; +- ++ /* Keep atomic mem on separate cachelines in structs that include it */ ++ atomic_t mem ____cacheline_aligned_in_smp; + /* sysctls */ + int timeout; + int high_thresh; +@@ -110,11 +105,11 @@ void inet_frags_fini(struct inet_frags * + + static inline int inet_frags_init_net(struct netns_frags *nf) + { +- return percpu_counter_init(&nf->mem, 0, GFP_KERNEL); ++ atomic_set(&nf->mem, 0); ++ return 0; + } + static inline void inet_frags_uninit_net(struct netns_frags *nf) + { +- percpu_counter_destroy(&nf->mem); + } + + void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); +@@ -140,37 +135,24 @@ static inline bool inet_frag_evicting(st + + /* Memory Tracking Functions. */ + +-/* The default percpu_counter batch size is not big enough to scale to +- * fragmentation mem acct sizes. +- * The mem size of a 64K fragment is approx: +- * (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes +- */ +-static unsigned int frag_percpu_counter_batch = 130000; +- + static inline int frag_mem_limit(struct netns_frags *nf) + { +- return percpu_counter_read(&nf->mem); ++ return atomic_read(&nf->mem); + } + + static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) + { +- __percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch); ++ atomic_sub(i, &nf->mem); + } + + static inline void add_frag_mem_limit(struct netns_frags *nf, int i) + { +- __percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch); ++ atomic_add(i, &nf->mem); + } + +-static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) ++static inline int sum_frag_mem_limit(struct netns_frags *nf) + { +- unsigned int res; +- +- local_bh_disable(); +- res = percpu_counter_sum_positive(&nf->mem); +- local_bh_enable(); +- +- return res; ++ return atomic_read(&nf->mem); + } + + /* RFC 3168 support : +--- a/net/ipv4/inet_fragment.c ++++ b/net/ipv4/inet_fragment.c +@@ -234,10 +234,8 @@ evict_again: + cond_resched(); + + if (read_seqretry(&f->rnd_seqlock, seq) || +- percpu_counter_sum(&nf->mem)) ++ sum_frag_mem_limit(nf)) + goto evict_again; +- +- percpu_counter_destroy(&nf->mem); + } + EXPORT_SYMBOL(inet_frags_exit_net); + diff --git a/queue-4.4/series b/queue-4.4/series new file mode 100644 index 00000000000..f15a5b625a5 --- /dev/null +++ b/queue-4.4/series @@ -0,0 +1,12 @@ +ipv6-accept-64k-1-packet-length-in-ip6_find_1stfragopt.patch +ipv6-add-rcu-grace-period-before-freeing-fib6_node.patch +ipv6-fix-sparse-warning-on-rt6i_node.patch +qlge-avoid-memcpy-buffer-overflow.patch +revert-net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch +tcp-initialize-rcv_mss-to-tcp_min_mss-instead-of-0.patch +revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch +revert-net-fix-percpu-memory-leaks.patch +gianfar-fix-tx-flow-control-deactivation.patch +ip6_gre-update-mtu-properly-in-ip6gre_err.patch +ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch +ipv6-fix-typo-in-fib6_net_exit.patch diff --git a/queue-4.4/tcp-initialize-rcv_mss-to-tcp_min_mss-instead-of-0.patch b/queue-4.4/tcp-initialize-rcv_mss-to-tcp_min_mss-instead-of-0.patch new file mode 100644 index 00000000000..1baa27908fc --- /dev/null +++ b/queue-4.4/tcp-initialize-rcv_mss-to-tcp_min_mss-instead-of-0.patch @@ -0,0 +1,40 @@ +From foo@baz Fri Sep 15 10:03:45 PDT 2017 +From: Wei Wang +Date: Thu, 18 May 2017 11:22:33 -0700 +Subject: tcp: initialize rcv_mss to TCP_MIN_MSS instead of 0 + +From: Wei Wang + + +[ Upstream commit 499350a5a6e7512d9ed369ed63a4244b6536f4f8 ] + +When tcp_disconnect() is called, inet_csk_delack_init() sets +icsk->icsk_ack.rcv_mss to 0. +This could potentially cause tcp_recvmsg() => tcp_cleanup_rbuf() => +__tcp_select_window() call path to have division by 0 issue. +So this patch initializes rcv_mss to TCP_MIN_MSS instead of 0. + +Reported-by: Andrey Konovalov +Signed-off-by: Wei Wang +Signed-off-by: Eric Dumazet +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2260,6 +2260,10 @@ int tcp_disconnect(struct sock *sk, int + tcp_set_ca_state(sk, TCP_CA_Open); + tcp_clear_retrans(tp); + inet_csk_delack_init(sk); ++ /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 ++ * issue in __tcp_select_window() ++ */ ++ icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; + tcp_init_send_head(sk); + memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); + __sk_dst_reset(sk);