From: Greg Kroah-Hartman Date: Wed, 11 Sep 2013 05:18:53 +0000 (-0700) Subject: 3.0-stable patches X-Git-Tag: v3.0.96~11 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3b059fd87593128199a71d0e6ddce71b30a65cd0;p=thirdparty%2Fkernel%2Fstable-queue.git 3.0-stable patches added patches: af_key-initialize-satype-in-key_notify_policy_flush.patch fib_trie-remove-potential-out-of-bound-access.patch htb-fix-sign-extension-bug.patch icmpv6-treat-dest-unreachable-codes-5-and-6-as-eacces-not-eproto.patch ipv6-don-t-depend-on-per-socket-memory-for-neighbour-discovery-messages.patch ipv6-don-t-stop-backtracking-in-fib6_lookup_1-if-subtree-does-not-match.patch ipv6-drop-packets-with-multiple-fragmentation-headers.patch ipv6-remove-max_addresses-check-from-ipv6_create_tempaddr.patch net-bridge-convert-mldv2-query-mrc-into-msecs_to_jiffies-for-max_delay.patch net-check-net.core.somaxconn-sysctl-values.patch tcp-cubic-fix-bug-in-bictcp_acked.patch tcp-cubic-fix-overflow-error-in-bictcp_update.patch tipc-fix-lockdep-warning-during-bearer-initialization.patch tun-signedness-bug-in-tun_get_user.patch --- diff --git a/queue-3.0/af_key-initialize-satype-in-key_notify_policy_flush.patch b/queue-3.0/af_key-initialize-satype-in-key_notify_policy_flush.patch new file mode 100644 index 00000000000..328b545bb9a --- /dev/null +++ b/queue-3.0/af_key-initialize-satype-in-key_notify_policy_flush.patch @@ -0,0 +1,29 @@ +From ca04998d1812ee7cdc7b30077df4c05dd835487c Mon Sep 17 00:00:00 2001 +From: Nicolas Dichtel +Date: Mon, 18 Feb 2013 16:24:20 +0100 +Subject: af_key: initialize satype in key_notify_policy_flush() + +From: Nicolas Dichtel + +[ Upstream commit 85dfb745ee40232876663ae206cba35f24ab2a40 ] + +This field was left uninitialized. Some user daemons perform check against this +field. + +Signed-off-by: Nicolas Dichtel +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman +--- + net/key/af_key.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/key/af_key.c ++++ b/net/key/af_key.c +@@ -1704,6 +1704,7 @@ static int key_notify_sa_flush(const str + hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_version = PF_KEY_V2; + hdr->sadb_msg_errno = (uint8_t) 0; ++ hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; + hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); + hdr->sadb_msg_reserved = 0; + diff --git a/queue-3.0/fib_trie-remove-potential-out-of-bound-access.patch b/queue-3.0/fib_trie-remove-potential-out-of-bound-access.patch new file mode 100644 index 00000000000..cd2636c64fd --- /dev/null +++ b/queue-3.0/fib_trie-remove-potential-out-of-bound-access.patch @@ -0,0 +1,51 @@ +From 639d0c1b07b0dc3bf5be5e799ede96d72a16ce3d Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 5 Aug 2013 11:18:49 -0700 +Subject: fib_trie: remove potential out of bound access + +From: Eric Dumazet + +[ Upstream commit aab515d7c32a34300312416c50314e755ea6f765 ] + +AddressSanitizer [1] dynamic checker pointed a potential +out of bound access in leaf_walk_rcu() + +We could allocate one more slot in tnode_new() to leave the prefetch() +in-place but it looks not worth the pain. + +Bug added in commit 82cfbb008572b ("[IPV4] fib_trie: iterator recode") + +[1] : +https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel + +Reported-by: Andrey Konovalov +Signed-off-by: Eric Dumazet +Cc: Dmitry Vyukov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_trie.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -72,7 +72,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -1772,10 +1771,8 @@ static struct leaf *leaf_walk_rcu(struct + if (!c) + continue; + +- if (IS_LEAF(c)) { +- prefetch(rcu_dereference_rtnl(p->child[idx])); ++ if (IS_LEAF(c)) + return (struct leaf *) c; +- } + + /* Rescan start scanning in new node */ + p = (struct tnode *) c; diff --git a/queue-3.0/htb-fix-sign-extension-bug.patch b/queue-3.0/htb-fix-sign-extension-bug.patch new file mode 100644 index 00000000000..19e21189760 --- /dev/null +++ b/queue-3.0/htb-fix-sign-extension-bug.patch @@ -0,0 +1,39 @@ +From 1a1756eb58003653cbdf29d88e7402006ab7e2ff Mon Sep 17 00:00:00 2001 +From: stephen hemminger +Date: Thu, 1 Aug 2013 22:32:07 -0700 +Subject: htb: fix sign extension bug + +From: stephen hemminger + +[ Upstream commit cbd375567f7e4811b1c721f75ec519828ac6583f ] + +When userspace passes a large priority value +the assignment of the unsigned value hopt->prio +to signed int cl->prio causes cl->prio to become negative and the +comparison is with TC_HTB_NUMPRIO is always false. + +The result is that HTB crashes by referencing outside +the array when processing packets. With this patch the large value +wraps around like other values outside the normal range. + +See: https://bugzilla.kernel.org/show_bug.cgi?id=60669 + +Signed-off-by: Stephen Hemminger +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_htb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sched/sch_htb.c ++++ b/net/sched/sch_htb.c +@@ -86,7 +86,7 @@ struct htb_class { + unsigned int children; + struct htb_class *parent; /* parent class */ + +- int prio; /* these two are used only by leaves... */ ++ u32 prio; /* these two are used only by leaves... */ + int quantum; /* but stored for parent-to-leaf return */ + + union { diff --git a/queue-3.0/icmpv6-treat-dest-unreachable-codes-5-and-6-as-eacces-not-eproto.patch b/queue-3.0/icmpv6-treat-dest-unreachable-codes-5-and-6-as-eacces-not-eproto.patch new file mode 100644 index 00000000000..cb3f8f51a6a --- /dev/null +++ b/queue-3.0/icmpv6-treat-dest-unreachable-codes-5-and-6-as-eacces-not-eproto.patch @@ -0,0 +1,72 @@ +From 41c270f2e8505aedf59380748f9fe07fc8c6eb78 Mon Sep 17 00:00:00 2001 +From: Jiri Bohac +Date: Fri, 30 Aug 2013 11:18:45 +0200 +Subject: ICMPv6: treat dest unreachable codes 5 and 6 as EACCES, not EPROTO + +From: Jiri Bohac + +[ Upstream commit 61e76b178dbe7145e8d6afa84bb4ccea71918994 ] + +RFC 4443 has defined two additional codes for ICMPv6 type 1 (destination +unreachable) messages: + 5 - Source address failed ingress/egress policy + 6 - Reject route to destination + +Now they are treated as protocol error and icmpv6_err_convert() converts them +to EPROTO. + +RFC 4443 says: + "Codes 5 and 6 are more informative subsets of code 1." + +Treat codes 5 and 6 as code 1 (EACCES) + +Btw, connect() returning -EPROTO confuses firefox, so that fallback to +other/IPv4 addresses does not work: +https://bugzilla.mozilla.org/show_bug.cgi?id=910773 + +Signed-off-by: Jiri Bohac +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/icmpv6.h | 2 ++ + net/ipv6/icmp.c | 10 +++++++++- + 2 files changed, 11 insertions(+), 1 deletion(-) + +--- a/include/linux/icmpv6.h ++++ b/include/linux/icmpv6.h +@@ -123,6 +123,8 @@ static inline struct icmp6hdr *icmp6_hdr + #define ICMPV6_NOT_NEIGHBOUR 2 + #define ICMPV6_ADDR_UNREACH 3 + #define ICMPV6_PORT_UNREACH 4 ++#define ICMPV6_POLICY_FAIL 5 ++#define ICMPV6_REJECT_ROUTE 6 + + /* + * Codes for Time Exceeded +--- a/net/ipv6/icmp.c ++++ b/net/ipv6/icmp.c +@@ -917,6 +917,14 @@ static const struct icmp6_err { + .err = ECONNREFUSED, + .fatal = 1, + }, ++ { /* POLICY_FAIL */ ++ .err = EACCES, ++ .fatal = 1, ++ }, ++ { /* REJECT_ROUTE */ ++ .err = EACCES, ++ .fatal = 1, ++ }, + }; + + int icmpv6_err_convert(u8 type, u8 code, int *err) +@@ -928,7 +936,7 @@ int icmpv6_err_convert(u8 type, u8 code, + switch (type) { + case ICMPV6_DEST_UNREACH: + fatal = 1; +- if (code <= ICMPV6_PORT_UNREACH) { ++ if (code < ARRAY_SIZE(tab_unreach)) { + *err = tab_unreach[code].err; + fatal = tab_unreach[code].fatal; + } diff --git a/queue-3.0/ipv6-don-t-depend-on-per-socket-memory-for-neighbour-discovery-messages.patch b/queue-3.0/ipv6-don-t-depend-on-per-socket-memory-for-neighbour-discovery-messages.patch new file mode 100644 index 00000000000..8c61b1379c4 --- /dev/null +++ b/queue-3.0/ipv6-don-t-depend-on-per-socket-memory-for-neighbour-discovery-messages.patch @@ -0,0 +1,81 @@ +From 3fb47cb1a738f43c4d7b0f7497978d27575df804 Mon Sep 17 00:00:00 2001 +From: Thomas Graf +Date: Tue, 3 Sep 2013 13:37:01 +0200 +Subject: ipv6: Don't depend on per socket memory for neighbour discovery messages + +From: Thomas Graf + +[ Upstream commit 25a6e6b84fba601eff7c28d30da8ad7cfbef0d43 ] + +Allocating skbs when sending out neighbour discovery messages +currently uses sock_alloc_send_skb() based on a per net namespace +socket and thus share a socket wmem buffer space. + +If a netdevice is temporarily unable to transmit due to carrier +loss or for other reasons, the queued up ndisc messages will cosnume +all of the wmem space and will thus prevent from any more skbs to +be allocated even for netdevices that are able to transmit packets. + +The number of neighbour discovery messages sent is very limited, +use of alloc_skb() bypasses the socket wmem buffer size enforcement +while the manual call to skb_set_owner_w() maintains the socket +reference needed for the IPv6 output path. + +This patch has orginally been posted by Eric Dumazet in a modified +form. + +Signed-off-by: Thomas Graf +Cc: Eric Dumazet +Cc: Hannes Frederic Sowa +Cc: Stephen Warren +Cc: Fabio Estevam +Tested-by: Fabio Estevam +Tested-by: Stephen Warren +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ndisc.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +--- a/net/ipv6/ndisc.c ++++ b/net/ipv6/ndisc.c +@@ -456,7 +456,6 @@ struct sk_buff *ndisc_build_skb(struct n + struct sk_buff *skb; + struct icmp6hdr *hdr; + int len; +- int err; + u8 *opt; + + if (!dev->addr_len) +@@ -466,14 +465,12 @@ struct sk_buff *ndisc_build_skb(struct n + if (llinfo) + len += ndisc_opt_addr_space(dev); + +- skb = sock_alloc_send_skb(sk, +- (MAX_HEADER + sizeof(struct ipv6hdr) + +- len + LL_ALLOCATED_SPACE(dev)), +- 1, &err); ++ skb = alloc_skb((MAX_HEADER + sizeof(struct ipv6hdr) + ++ len + LL_ALLOCATED_SPACE(dev)), GFP_ATOMIC); + if (!skb) { + ND_PRINTK0(KERN_ERR +- "ICMPv6 ND: %s() failed to allocate an skb, err=%d.\n", +- __func__, err); ++ "ICMPv6 ND: %s() failed to allocate an skb.\n", ++ __func__); + return NULL; + } + +@@ -501,6 +498,11 @@ struct sk_buff *ndisc_build_skb(struct n + csum_partial(hdr, + len, 0)); + ++ /* Manually assign socket ownership as we avoid calling ++ * sock_alloc_send_pskb() to bypass wmem buffer limits ++ */ ++ skb_set_owner_w(skb, sk); ++ + return skb; + } + diff --git a/queue-3.0/ipv6-don-t-stop-backtracking-in-fib6_lookup_1-if-subtree-does-not-match.patch b/queue-3.0/ipv6-don-t-stop-backtracking-in-fib6_lookup_1-if-subtree-does-not-match.patch new file mode 100644 index 00000000000..726a17e7bd7 --- /dev/null +++ b/queue-3.0/ipv6-don-t-stop-backtracking-in-fib6_lookup_1-if-subtree-does-not-match.patch @@ -0,0 +1,58 @@ +From 40d36e4b306f4f316723c31758b1ad7a473b3de1 Mon Sep 17 00:00:00 2001 +From: Hannes Frederic Sowa +Date: Wed, 7 Aug 2013 02:34:31 +0200 +Subject: ipv6: don't stop backtracking in fib6_lookup_1 if subtree does not match + +From: Hannes Frederic Sowa + +[ Upstream commit 3e3be275851bc6fc90bfdcd732cd95563acd982b ] + +In case a subtree did not match we currently stop backtracking and return +NULL (root table from fib_lookup). This could yield in invalid routing +table lookups when using subtrees. + +Instead continue to backtrack until a valid subtree or node is found +and return this match. + +Also remove unneeded NULL check. + +Reported-by: Teco Boot +Cc: YOSHIFUJI Hideaki +Cc: David Lamparter +Cc: +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_fib.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -866,14 +866,22 @@ static struct fib6_node * fib6_lookup_1( + + if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { + #ifdef CONFIG_IPV6_SUBTREES +- if (fn->subtree) +- fn = fib6_lookup_1(fn->subtree, args + 1); ++ if (fn->subtree) { ++ struct fib6_node *sfn; ++ sfn = fib6_lookup_1(fn->subtree, ++ args + 1); ++ if (!sfn) ++ goto backtrack; ++ fn = sfn; ++ } + #endif +- if (!fn || fn->fn_flags & RTN_RTINFO) ++ if (fn->fn_flags & RTN_RTINFO) + return fn; + } + } +- ++#ifdef CONFIG_IPV6_SUBTREES ++backtrack: ++#endif + if (fn->fn_flags & RTN_ROOT) + break; + diff --git a/queue-3.0/ipv6-drop-packets-with-multiple-fragmentation-headers.patch b/queue-3.0/ipv6-drop-packets-with-multiple-fragmentation-headers.patch new file mode 100644 index 00000000000..5ac530a7746 --- /dev/null +++ b/queue-3.0/ipv6-drop-packets-with-multiple-fragmentation-headers.patch @@ -0,0 +1,63 @@ +From 499966095ede550db4043ae95550966f855a5605 Mon Sep 17 00:00:00 2001 +From: Hannes Frederic Sowa +Date: Fri, 16 Aug 2013 13:30:07 +0200 +Subject: ipv6: drop packets with multiple fragmentation headers + +From: Hannes Frederic Sowa + +[ Upstream commit f46078cfcd77fa5165bf849f5e568a7ac5fa569c ] + +It is not allowed for an ipv6 packet to contain multiple fragmentation +headers. So discard packets which were already reassembled by +fragmentation logic and send back a parameter problem icmp. + +The updates for RFC 6980 will come in later, I have to do a bit more +research here. + +Cc: YOSHIFUJI Hideaki +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ipv6.h | 1 + + net/ipv6/reassembly.c | 5 +++++ + 2 files changed, 6 insertions(+) + +--- a/include/linux/ipv6.h ++++ b/include/linux/ipv6.h +@@ -255,6 +255,7 @@ struct inet6_skb_parm { + #define IP6SKB_XFRM_TRANSFORMED 1 + #define IP6SKB_FORWARDED 2 + #define IP6SKB_REROUTED 4 ++#define IP6SKB_FRAGMENTED 16 + }; + + #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) +--- a/net/ipv6/reassembly.c ++++ b/net/ipv6/reassembly.c +@@ -516,6 +516,7 @@ static int ip6_frag_reasm(struct frag_qu + head->tstamp = fq->q.stamp; + ipv6_hdr(head)->payload_len = htons(payload_len); + IP6CB(head)->nhoff = nhoff; ++ IP6CB(head)->flags |= IP6SKB_FRAGMENTED; + + /* Yes, and fold redundant checksum back. 8) */ + if (head->ip_summed == CHECKSUM_COMPLETE) +@@ -551,6 +552,9 @@ static int ipv6_frag_rcv(struct sk_buff + const struct ipv6hdr *hdr = ipv6_hdr(skb); + struct net *net = dev_net(skb_dst(skb)->dev); + ++ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) ++ goto fail_hdr; ++ + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); + + /* Jumbo payload inhibits frag. header */ +@@ -571,6 +575,7 @@ static int ipv6_frag_rcv(struct sk_buff + ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); + + IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); ++ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; + return 1; + } + diff --git a/queue-3.0/ipv6-remove-max_addresses-check-from-ipv6_create_tempaddr.patch b/queue-3.0/ipv6-remove-max_addresses-check-from-ipv6_create_tempaddr.patch new file mode 100644 index 00000000000..78f654aacd5 --- /dev/null +++ b/queue-3.0/ipv6-remove-max_addresses-check-from-ipv6_create_tempaddr.patch @@ -0,0 +1,64 @@ +From c230bc28420cd649949433d25d67679c0608157a Mon Sep 17 00:00:00 2001 +From: Hannes Frederic Sowa +Date: Fri, 16 Aug 2013 13:02:27 +0200 +Subject: ipv6: remove max_addresses check from ipv6_create_tempaddr + +From: Hannes Frederic Sowa + +[ Upstream commit 4b08a8f1bd8cb4541c93ec170027b4d0782dab52 ] + +Because of the max_addresses check attackers were able to disable privacy +extensions on an interface by creating enough autoconfigured addresses: + + + +But the check is not actually needed: max_addresses protects the +kernel to install too many ipv6 addresses on an interface and guards +addrconf_prefix_rcv to install further addresses as soon as this limit +is reached. We only generate temporary addresses in direct response of +a new address showing up. As soon as we filled up the maximum number of +addresses of an interface, we stop installing more addresses and thus +also stop generating more temp addresses. + +Even if the attacker tries to generate a lot of temporary addresses +by announcing a prefix and removing it again (lifetime == 0) we won't +install more temp addresses, because the temporary addresses do count +to the maximum number of addresses, thus we would stop installing new +autoconfigured addresses when the limit is reached. + +This patch fixes CVE-2013-0343 (but other layer-2 attacks are still +possible). + +Thanks to Ding Tianhong to bring this topic up again. + +Signed-off-by: Hannes Frederic Sowa +Cc: Ding Tianhong +Cc: George Kargiotakis +Cc: P J P +Cc: YOSHIFUJI Hideaki +Acked-by: Ding Tianhong +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -912,12 +912,10 @@ retry: + if (ifp->flags & IFA_F_OPTIMISTIC) + addr_flags |= IFA_F_OPTIMISTIC; + +- ift = !max_addresses || +- ipv6_count_addresses(idev) < max_addresses ? +- ipv6_add_addr(idev, &addr, tmp_plen, +- ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, +- addr_flags) : NULL; +- if (!ift || IS_ERR(ift)) { ++ ift = ipv6_add_addr(idev, &addr, tmp_plen, ++ ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, ++ addr_flags); ++ if (IS_ERR(ift)) { + in6_ifa_put(ifp); + in6_dev_put(idev); + printk(KERN_INFO diff --git a/queue-3.0/net-bridge-convert-mldv2-query-mrc-into-msecs_to_jiffies-for-max_delay.patch b/queue-3.0/net-bridge-convert-mldv2-query-mrc-into-msecs_to_jiffies-for-max_delay.patch new file mode 100644 index 00000000000..185df5212bc --- /dev/null +++ b/queue-3.0/net-bridge-convert-mldv2-query-mrc-into-msecs_to_jiffies-for-max_delay.patch @@ -0,0 +1,43 @@ +From 2c67f88fcc48223b7e3f409c346903efe8a41ab6 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Thu, 29 Aug 2013 23:55:05 +0200 +Subject: net: bridge: convert MLDv2 Query MRC into msecs_to_jiffies for max_delay + +From: Daniel Borkmann + +[ Upstream commit 2d98c29b6fb3de44d9eaa73c09f9cf7209346383 ] + +While looking into MLDv1/v2 code, I noticed that bridging code does +not convert it's max delay into jiffies for MLDv2 messages as we do +in core IPv6' multicast code. + +RFC3810, 5.1.3. Maximum Response Code says: + + The Maximum Response Code field specifies the maximum time allowed + before sending a responding Report. The actual time allowed, called + the Maximum Response Delay, is represented in units of milliseconds, + and is derived from the Maximum Response Code as follows: [...] + +As we update timers that work with jiffies, we need to convert it. + +Signed-off-by: Daniel Borkmann +Cc: Linus Lüssing +Cc: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_multicast.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -1155,7 +1155,8 @@ static int br_ip6_multicast_query(struct + mld2q = (struct mld2_query *)icmp6_hdr(skb); + if (!mld2q->mld2q_nsrcs) + group = &mld2q->mld2q_mca; +- max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(mld2q->mld2q_mrc) : 1; ++ ++ max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mld2q->mld2q_mrc))), 1UL); + } + + if (!group) diff --git a/queue-3.0/net-check-net.core.somaxconn-sysctl-values.patch b/queue-3.0/net-check-net.core.somaxconn-sysctl-values.patch new file mode 100644 index 00000000000..c52b0eefbd8 --- /dev/null +++ b/queue-3.0/net-check-net.core.somaxconn-sysctl-values.patch @@ -0,0 +1,70 @@ +From 4581b6d7f4691d00374723890a89776083bab5ce Mon Sep 17 00:00:00 2001 +From: Roman Gushchin +Date: Fri, 2 Aug 2013 18:36:40 +0400 +Subject: net: check net.core.somaxconn sysctl values + +From: Roman Gushchin + +[ Upstream commit 5f671d6b4ec3e6d66c2a868738af2cdea09e7509 ] + +It's possible to assign an invalid value to the net.core.somaxconn +sysctl variable, because there is no checks at all. + +The sk_max_ack_backlog field of the sock structure is defined as +unsigned short. Therefore, the backlog argument in inet_listen() +shouldn't exceed USHRT_MAX. The backlog argument in the listen() syscall +is truncated to the somaxconn value. So, the somaxconn value shouldn't +exceed 65535 (USHRT_MAX). +Also, negative values of somaxconn are meaningless. + +before: +$ sysctl -w net.core.somaxconn=256 +net.core.somaxconn = 256 +$ sysctl -w net.core.somaxconn=65536 +net.core.somaxconn = 65536 +$ sysctl -w net.core.somaxconn=-100 +net.core.somaxconn = -100 + +after: +$ sysctl -w net.core.somaxconn=256 +net.core.somaxconn = 256 +$ sysctl -w net.core.somaxconn=65536 +error: "Invalid argument" setting key "net.core.somaxconn" +$ sysctl -w net.core.somaxconn=-100 +error: "Invalid argument" setting key "net.core.somaxconn" + +Based on a prior patch from Changli Gao. + +Signed-off-by: Roman Gushchin +Reported-by: Changli Gao +Suggested-by: Eric Dumazet +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sysctl_net_core.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/core/sysctl_net_core.c ++++ b/net/core/sysctl_net_core.c +@@ -19,6 +19,9 @@ + #include + #include + ++static int zero = 0; ++static int ushort_max = USHRT_MAX; ++ + #ifdef CONFIG_RPS + static int rps_sock_flow_sysctl(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +@@ -192,7 +195,9 @@ static struct ctl_table netns_core_table + .data = &init_net.core.sysctl_somaxconn, + .maxlen = sizeof(int), + .mode = 0644, +- .proc_handler = proc_dointvec ++ .extra1 = &zero, ++ .extra2 = &ushort_max, ++ .proc_handler = proc_dointvec_minmax + }, + { } + }; diff --git a/queue-3.0/series b/queue-3.0/series new file mode 100644 index 00000000000..f3922b68a9b --- /dev/null +++ b/queue-3.0/series @@ -0,0 +1,14 @@ +htb-fix-sign-extension-bug.patch +net-check-net.core.somaxconn-sysctl-values.patch +fib_trie-remove-potential-out-of-bound-access.patch +tcp-cubic-fix-overflow-error-in-bictcp_update.patch +tcp-cubic-fix-bug-in-bictcp_acked.patch +ipv6-don-t-stop-backtracking-in-fib6_lookup_1-if-subtree-does-not-match.patch +tun-signedness-bug-in-tun_get_user.patch +ipv6-remove-max_addresses-check-from-ipv6_create_tempaddr.patch +ipv6-drop-packets-with-multiple-fragmentation-headers.patch +ipv6-don-t-depend-on-per-socket-memory-for-neighbour-discovery-messages.patch +net-bridge-convert-mldv2-query-mrc-into-msecs_to_jiffies-for-max_delay.patch +icmpv6-treat-dest-unreachable-codes-5-and-6-as-eacces-not-eproto.patch +tipc-fix-lockdep-warning-during-bearer-initialization.patch +af_key-initialize-satype-in-key_notify_policy_flush.patch diff --git a/queue-3.0/tcp-cubic-fix-bug-in-bictcp_acked.patch b/queue-3.0/tcp-cubic-fix-bug-in-bictcp_acked.patch new file mode 100644 index 00000000000..ae0139a2734 --- /dev/null +++ b/queue-3.0/tcp-cubic-fix-bug-in-bictcp_acked.patch @@ -0,0 +1,46 @@ +From 72af21fc42ef8882ffa2feba3712117b178707c0 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 5 Aug 2013 20:05:12 -0700 +Subject: tcp: cubic: fix bug in bictcp_acked() + +From: Eric Dumazet + +[ Upstream commit cd6b423afd3c08b27e1fed52db828ade0addbc6b ] + +While investigating about strange increase of retransmit rates +on hosts ~24 days after boot, Van found hystart was disabled +if ca->epoch_start was 0, as following condition is true +when tcp_time_stamp high order bit is set. + +(s32)(tcp_time_stamp - ca->epoch_start) < HZ + +Quoting Van : + + At initialization & after every loss ca->epoch_start is set to zero so + I believe that the above line will turn off hystart as soon as the 2^31 + bit is set in tcp_time_stamp & hystart will stay off for 24 days. + I think we've observed that cubic's restart is too aggressive without + hystart so this might account for the higher drop rate we observe. + +Diagnosed-by: Van Jacobson +Signed-off-by: Eric Dumazet +Cc: Neal Cardwell +Cc: Yuchung Cheng +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_cubic.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_cubic.c ++++ b/net/ipv4/tcp_cubic.c +@@ -414,7 +414,7 @@ static void bictcp_acked(struct sock *sk + return; + + /* Discard delay samples right after fast recovery */ +- if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) ++ if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ) + return; + + delay = (rtt_us << 3) / USEC_PER_MSEC; diff --git a/queue-3.0/tcp-cubic-fix-overflow-error-in-bictcp_update.patch b/queue-3.0/tcp-cubic-fix-overflow-error-in-bictcp_update.patch new file mode 100644 index 00000000000..38defc17217 --- /dev/null +++ b/queue-3.0/tcp-cubic-fix-overflow-error-in-bictcp_update.patch @@ -0,0 +1,68 @@ +From 2a2d16df60d3c8d4ac6409026c120d38f00c9ea5 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 5 Aug 2013 17:10:15 -0700 +Subject: tcp: cubic: fix overflow error in bictcp_update() + +From: Eric Dumazet + +[ Upstream commit 2ed0edf9090bf4afa2c6fc4f38575a85a80d4b20 ] + +commit 17a6e9f1aa9 ("tcp_cubic: fix clock dependency") added an +overflow error in bictcp_update() in following code : + +/* change the unit from HZ to bictcp_HZ */ +t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) - + ca->epoch_start) << BICTCP_HZ) / HZ; + +Because msecs_to_jiffies() being unsigned long, compiler does +implicit type promotion. + +We really want to constrain (tcp_time_stamp - ca->epoch_start) +to a signed 32bit value, or else 't' has unexpected high values. + +This bugs triggers an increase of retransmit rates ~24 days after +boot [1], as the high order bit of tcp_time_stamp flips. + +[1] for hosts with HZ=1000 + +Big thanks to Van Jacobson for spotting this problem. + +Diagnosed-by: Van Jacobson +Signed-off-by: Eric Dumazet +Cc: Neal Cardwell +Cc: Yuchung Cheng +Cc: Stephen Hemminger +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_cubic.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/net/ipv4/tcp_cubic.c ++++ b/net/ipv4/tcp_cubic.c +@@ -204,8 +204,8 @@ static u32 cubic_root(u64 a) + */ + static inline void bictcp_update(struct bictcp *ca, u32 cwnd) + { +- u64 offs; +- u32 delta, t, bic_target, max_cnt; ++ u32 delta, bic_target, max_cnt; ++ u64 offs, t; + + ca->ack_cnt++; /* count the number of ACKs */ + +@@ -248,9 +248,11 @@ static inline void bictcp_update(struct + * if the cwnd < 1 million packets !!! + */ + ++ t = (s32)(tcp_time_stamp - ca->epoch_start); ++ t += msecs_to_jiffies(ca->delay_min >> 3); + /* change the unit from HZ to bictcp_HZ */ +- t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) +- - ca->epoch_start) << BICTCP_HZ) / HZ; ++ t <<= BICTCP_HZ; ++ do_div(t, HZ); + + if (t < ca->bic_K) /* t - K */ + offs = ca->bic_K - t; diff --git a/queue-3.0/tipc-fix-lockdep-warning-during-bearer-initialization.patch b/queue-3.0/tipc-fix-lockdep-warning-during-bearer-initialization.patch new file mode 100644 index 00000000000..b870c8e5a65 --- /dev/null +++ b/queue-3.0/tipc-fix-lockdep-warning-during-bearer-initialization.patch @@ -0,0 +1,175 @@ +From f023235029429ba54960f51dc46ea98dfca16a9b Mon Sep 17 00:00:00 2001 +From: Ying Xue +Date: Thu, 16 Aug 2012 12:09:07 +0000 +Subject: tipc: fix lockdep warning during bearer initialization + +From: Ying Xue + +[ Upstream commit 4225a398c1352a7a5c14dc07277cb5cc4473983b ] + +When the lockdep validator is enabled, it will report the below +warning when we enable a TIPC bearer: + +[ INFO: possible irq lock inversion dependency detected ] +--------------------------------------------------------- +Possible interrupt unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(ptype_lock); + local_irq_disable(); + lock(tipc_net_lock); + lock(ptype_lock); + + lock(tipc_net_lock); + + *** DEADLOCK *** + +the shortest dependencies between 2nd lock and 1st lock: + -> (ptype_lock){+.+...} ops: 10 { +[...] +SOFTIRQ-ON-W at: + [] __lock_acquire+0x528/0x13e0 + [] lock_acquire+0x90/0x100 + [] _raw_spin_lock+0x38/0x50 + [] dev_add_pack+0x3a/0x60 + [] arp_init+0x1a/0x48 + [] inet_init+0x181/0x27e + [] do_one_initcall+0x34/0x170 + [] kernel_init+0x110/0x1b2 + [] kernel_thread_helper+0x6/0x10 +[...] + ... key at: [] ptype_lock+0x10/0x20 + ... acquired at: + [] lock_acquire+0x90/0x100 + [] _raw_spin_lock+0x38/0x50 + [] dev_add_pack+0x3a/0x60 + [] enable_bearer+0xf2/0x140 [tipc] + [] tipc_enable_bearer+0x1ba/0x450 [tipc] + [] tipc_cfg_do_cmd+0x5c4/0x830 [tipc] + [] handle_cmd+0x42/0xd0 [tipc] + [] genl_rcv_msg+0x232/0x280 + [] netlink_rcv_skb+0x86/0xb0 + [] genl_rcv+0x1c/0x30 + [] netlink_unicast+0x174/0x1f0 + [] netlink_sendmsg+0x1eb/0x2d0 + [] sock_aio_write+0x161/0x170 + [] do_sync_write+0xac/0xf0 + [] vfs_write+0x156/0x170 + [] sys_write+0x42/0x70 + [] sysenter_do_call+0x12/0x38 +[...] +} + -> (tipc_net_lock){+..-..} ops: 4 { +[...] + IN-SOFTIRQ-R at: + [] __lock_acquire+0x64a/0x13e0 + [] lock_acquire+0x90/0x100 + [] _raw_read_lock_bh+0x3d/0x50 + [] tipc_recv_msg+0x1d/0x830 [tipc] + [] recv_msg+0x3f/0x50 [tipc] + [] __netif_receive_skb+0x22a/0x590 + [] netif_receive_skb+0x2b/0xf0 + [] pcnet32_poll+0x292/0x780 + [] net_rx_action+0xfa/0x1e0 + [] __do_softirq+0xae/0x1e0 +[...] +} + +>From the log, we can see three different call chains between +CPU0 and CPU1: + +Time 0 on CPU0: + + kernel_init()->inet_init()->dev_add_pack() + +At time 0, the ptype_lock is held by CPU0 in dev_add_pack(); + +Time 1 on CPU1: + + tipc_enable_bearer()->enable_bearer()->dev_add_pack() + +At time 1, tipc_enable_bearer() first holds tipc_net_lock, and then +wants to take ptype_lock to register TIPC protocol handler into the +networking stack. But the ptype_lock has been taken by dev_add_pack() +on CPU0, so at this time the dev_add_pack() running on CPU1 has to be +busy looping. + +Time 2 on CPU0: + + netif_receive_skb()->recv_msg()->tipc_recv_msg() + +At time 2, an incoming TIPC packet arrives at CPU0, hence +tipc_recv_msg() will be invoked. In tipc_recv_msg(), it first wants +to hold tipc_net_lock. At the moment, below scenario happens: + +On CPU0, below is our sequence of taking locks: + + lock(ptype_lock)->lock(tipc_net_lock) + +On CPU1, our sequence of taking locks looks like: + + lock(tipc_net_lock)->lock(ptype_lock) + +Obviously deadlock may happen in this case. + +But please note the deadlock possibly doesn't occur at all when the +first TIPC bearer is enabled. Before enable_bearer() -- running on +CPU1 does not hold ptype_lock, so the TIPC receive handler (i.e. +recv_msg()) is not registered successfully via dev_add_pack(), so +the tipc_recv_msg() cannot be called by recv_msg() even if a TIPC +message comes to CPU0. But when the second TIPC bearer is +registered, the deadlock can perhaps really happen. + +To fix it, we will push the work of registering TIPC protocol +handler into workqueue context. After the change, both paths taking +ptype_lock are always in process contexts, thus, the deadlock should +never occur. + +Signed-off-by: Ying Xue +Signed-off-by: Jon Maloy +Signed-off-by: Paul Gortmaker +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/eth_media.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +--- a/net/tipc/eth_media.c ++++ b/net/tipc/eth_media.c +@@ -53,6 +53,7 @@ struct eth_bearer { + struct tipc_bearer *bearer; + struct net_device *dev; + struct packet_type tipc_packet_type; ++ struct work_struct setup; + }; + + static struct eth_bearer eth_bearers[MAX_ETH_BEARERS]; +@@ -121,6 +122,17 @@ static int recv_msg(struct sk_buff *buf, + } + + /** ++ * setup_bearer - setup association between Ethernet bearer and interface ++ */ ++static void setup_bearer(struct work_struct *work) ++{ ++ struct eth_bearer *eb_ptr = ++ container_of(work, struct eth_bearer, setup); ++ ++ dev_add_pack(&eb_ptr->tipc_packet_type); ++} ++ ++/** + * enable_bearer - attach TIPC bearer to an Ethernet interface + */ + +@@ -167,7 +179,8 @@ static int enable_bearer(struct tipc_bea + eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr; + INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list)); + dev_hold(dev); +- dev_add_pack(&eb_ptr->tipc_packet_type); ++ INIT_WORK(&eb_ptr->setup, setup_bearer); ++ schedule_work(&eb_ptr->setup); + } + + /* Associate TIPC bearer with Ethernet bearer */ diff --git a/queue-3.0/tun-signedness-bug-in-tun_get_user.patch b/queue-3.0/tun-signedness-bug-in-tun_get_user.patch new file mode 100644 index 00000000000..bc378398aea --- /dev/null +++ b/queue-3.0/tun-signedness-bug-in-tun_get_user.patch @@ -0,0 +1,46 @@ +From ebc6bacdc6d7bcf3fcf074e0aa5dd2f2fb2ff70a Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Thu, 15 Aug 2013 15:52:57 +0300 +Subject: tun: signedness bug in tun_get_user() + +From: Dan Carpenter + +[ Upstream commit 15718ea0d844e4816dbd95d57a8a0e3e264ba90e ] + +The recent fix d9bf5f1309 "tun: compare with 0 instead of total_len" is +not totally correct. Because "len" and "sizeof()" are size_t type, that +means they are never less than zero. + +Signed-off-by: Dan Carpenter +Acked-by: Michael S. Tsirkin +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -614,8 +614,9 @@ static __inline__ ssize_t tun_get_user(s + int offset = 0; + + if (!(tun->flags & TUN_NO_PI)) { +- if ((len -= sizeof(pi)) > count) ++ if (len < sizeof(pi)) + return -EINVAL; ++ len -= sizeof(pi); + + if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi))) + return -EFAULT; +@@ -623,8 +624,9 @@ static __inline__ ssize_t tun_get_user(s + } + + if (tun->flags & TUN_VNET_HDR) { +- if ((len -= tun->vnet_hdr_sz) > count) ++ if (len < tun->vnet_hdr_sz) + return -EINVAL; ++ len -= tun->vnet_hdr_sz; + + if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso))) + return -EFAULT;