3.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 11 Sep 2013 16:13:50 +0000 (09:13 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 11 Sep 2013 16:13:50 +0000 (09:13 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 11 Sep 2013 16:13:50 +0000 (09:13 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 11 Sep 2013 16:13:50 +0000 (09:13 -0700)
diff --git a/queue-3.4/8139cp-fix-skb-leak-in-rx_status_loop-failure-path.patch b/queue-3.4/8139cp-fix-skb-leak-in-rx_status_loop-failure-path.patch

new file mode 100644 (file)

index 0000000..0ead4b0
--- /dev/null
+++ b/queue-3.4/8139cp-fix-skb-leak-in-rx_status_loop-failure-path.patch
@@ -0,0 +1,29 @@
+From b2fb347f0dae2ffea9234d3c6b4fd6ad4b75fe81 Mon Sep 17 00:00:00 2001
+From: Dave Jones <davej@redhat.com>
+Date: Fri, 9 Aug 2013 11:16:34 -0700
+Subject: 8139cp: Fix skb leak in rx_status_loop failure path.
+
+From: Dave Jones <davej@redhat.com>
+
+[ Upstream commit d06f5187469eee1b2932c02fd093d113cfc60d5e ]
+
+Introduced in cf3c4c03060b688cbc389ebc5065ebcce5653e96
+("8139cp: Add dma_mapping_error checking")
+
+Signed-off-by: Dave Jones <davej@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/8139cp.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/realtek/8139cp.c
++++ b/drivers/net/ethernet/realtek/8139cp.c
+@@ -524,6 +524,7 @@ rx_status_loop:
+                                        PCI_DMA_FROMDEVICE);
+               if (dma_mapping_error(&cp->pdev->dev, new_mapping)) {
+                       dev->stats.rx_dropped++;
++                      kfree_skb(new_skb);
+                       goto rx_next;
+               }
+ 
diff --git a/queue-3.4/af_key-initialize-satype-in-key_notify_policy_flush.patch b/queue-3.4/af_key-initialize-satype-in-key_notify_policy_flush.patch

new file mode 100644 (file)

index 0000000..004bd14
--- /dev/null
+++ b/queue-3.4/af_key-initialize-satype-in-key_notify_policy_flush.patch
@@ -0,0 +1,29 @@
+From 930e232cc73bdf918a0896ffc458902ab8897a88 Mon Sep 17 00:00:00 2001
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Mon, 18 Feb 2013 16:24:20 +0100
+Subject: af_key: initialize satype in key_notify_policy_flush()
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+[ Upstream commit 85dfb745ee40232876663ae206cba35f24ab2a40 ]
+
+This field was left uninitialized. Some user daemons perform check against this
+field.
+
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/key/af_key.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/key/af_key.c
++++ b/net/key/af_key.c
+@@ -1704,6 +1704,7 @@ static int key_notify_sa_flush(const str
+       hdr->sadb_msg_pid = c->pid;
+       hdr->sadb_msg_version = PF_KEY_V2;
+       hdr->sadb_msg_errno = (uint8_t) 0;
++      hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
+       hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
+       hdr->sadb_msg_reserved = 0;
+ 
diff --git a/queue-3.4/bonding-modify-only-neigh_parms-owned-by-us.patch b/queue-3.4/bonding-modify-only-neigh_parms-owned-by-us.patch

new file mode 100644 (file)

index 0000000..cce2f8d
--- /dev/null
+++ b/queue-3.4/bonding-modify-only-neigh_parms-owned-by-us.patch
@@ -0,0 +1,40 @@
+From f2c884d19bed59210a90449397ea9d34de0240ba Mon Sep 17 00:00:00 2001
+From: Veaceslav Falico <vfalico@redhat.com>
+Date: Fri, 2 Aug 2013 19:07:39 +0200
+Subject: bonding: modify only neigh_parms owned by us
+
+From: Veaceslav Falico <vfalico@redhat.com>
+
+[ Upstream commit 9918d5bf329d0dc5bb2d9d293bcb772bdb626e65 ]
+
+Otherwise, on neighbour creation, bond_neigh_init() will be called with a
+foreign netdev.
+
+Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -3750,11 +3750,17 @@ static int bond_neigh_init(struct neighb
+  * The bonding ndo_neigh_setup is called at init time beofre any
+  * slave exists. So we must declare proxy setup function which will
+  * be used at run time to resolve the actual slave neigh param setup.
++ *
++ * It's also called by master devices (such as vlans) to setup their
++ * underlying devices. In that case - do nothing, we're already set up from
++ * our init.
+  */
+ static int bond_neigh_setup(struct net_device *dev,
+                           struct neigh_parms *parms)
+ {
+-      parms->neigh_setup   = bond_neigh_init;
++      /* modify only our neigh_parms */
++      if (parms->dev == dev)
++              parms->neigh_setup = bond_neigh_init;
+ 
+       return 0;
+ }
diff --git a/queue-3.4/fib_trie-remove-potential-out-of-bound-access.patch b/queue-3.4/fib_trie-remove-potential-out-of-bound-access.patch

new file mode 100644 (file)

index 0000000..0163ea4
--- /dev/null
+++ b/queue-3.4/fib_trie-remove-potential-out-of-bound-access.patch
@@ -0,0 +1,51 @@
+From 28561e4df3d4d37ea5dbf01cd8623a5fe2dbb369 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 5 Aug 2013 11:18:49 -0700
+Subject: fib_trie: remove potential out of bound access
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit aab515d7c32a34300312416c50314e755ea6f765 ]
+
+AddressSanitizer [1] dynamic checker pointed a potential
+out of bound access in leaf_walk_rcu()
+
+We could allocate one more slot in tnode_new() to leave the prefetch()
+in-place but it looks not worth the pain.
+
+Bug added in commit 82cfbb008572b ("[IPV4] fib_trie: iterator recode")
+
+[1] :
+https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel
+
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_trie.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -71,7 +71,6 @@
+ #include <linux/init.h>
+ #include <linux/list.h>
+ #include <linux/slab.h>
+-#include <linux/prefetch.h>
+ #include <linux/export.h>
+ #include <net/net_namespace.h>
+ #include <net/ip.h>
+@@ -1772,10 +1771,8 @@ static struct leaf *leaf_walk_rcu(struct
+                       if (!c)
+                               continue;
+ 
+-                      if (IS_LEAF(c)) {
+-                              prefetch(rcu_dereference_rtnl(p->child[idx]));
++                      if (IS_LEAF(c))
+                               return (struct leaf *) c;
+-                      }
+ 
+                       /* Rescan start scanning in new node */
+                       p = (struct tnode *) c;
diff --git a/queue-3.4/htb-fix-sign-extension-bug.patch b/queue-3.4/htb-fix-sign-extension-bug.patch

new file mode 100644 (file)

index 0000000..02472c0
--- /dev/null
+++ b/queue-3.4/htb-fix-sign-extension-bug.patch
@@ -0,0 +1,39 @@
+From 470ca701447611fa3276dec1994b2be6fb3e3746 Mon Sep 17 00:00:00 2001
+From: stephen hemminger <stephen@networkplumber.org>
+Date: Thu, 1 Aug 2013 22:32:07 -0700
+Subject: htb: fix sign extension bug
+
+From: stephen hemminger <stephen@networkplumber.org>
+
+[ Upstream commit cbd375567f7e4811b1c721f75ec519828ac6583f ]
+
+When userspace passes a large priority value
+the assignment of the unsigned value hopt->prio
+to  signed int cl->prio causes cl->prio to become negative and the
+comparison is with TC_HTB_NUMPRIO is always false.
+
+The result is that HTB crashes by referencing outside
+the array when processing packets. With this patch the large value
+wraps around like other values outside the normal range.
+
+See: https://bugzilla.kernel.org/show_bug.cgi?id=60669
+
+Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_htb.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -86,7 +86,7 @@ struct htb_class {
+       unsigned int children;
+       struct htb_class *parent;       /* parent class */
+ 
+-      int prio;               /* these two are used only by leaves... */
++      u32 prio;               /* these two are used only by leaves... */
+       int quantum;            /* but stored for parent-to-leaf return */
+ 
+       union {
diff --git a/queue-3.4/icmpv6-treat-dest-unreachable-codes-5-and-6-as-eacces-not-eproto.patch b/queue-3.4/icmpv6-treat-dest-unreachable-codes-5-and-6-as-eacces-not-eproto.patch

new file mode 100644 (file)

index 0000000..a149be7
--- /dev/null
+++ b/queue-3.4/icmpv6-treat-dest-unreachable-codes-5-and-6-as-eacces-not-eproto.patch
@@ -0,0 +1,72 @@
+From a2681be0cbd9c7152cecd6c9a1ab5bfaa10e2471 Mon Sep 17 00:00:00 2001
+From: Jiri Bohac <jbohac@suse.cz>
+Date: Fri, 30 Aug 2013 11:18:45 +0200
+Subject: ICMPv6: treat dest unreachable codes 5 and 6 as EACCES, not EPROTO
+
+From: Jiri Bohac <jbohac@suse.cz>
+
+[ Upstream commit 61e76b178dbe7145e8d6afa84bb4ccea71918994 ]
+
+RFC 4443 has defined two additional codes for ICMPv6 type 1 (destination
+unreachable) messages:
+        5 - Source address failed ingress/egress policy
+       6 - Reject route to destination
+
+Now they are treated as protocol error and icmpv6_err_convert() converts them
+to EPROTO.
+
+RFC 4443 says:
+       "Codes 5 and 6 are more informative subsets of code 1."
+
+Treat codes 5 and 6 as code 1 (EACCES)
+
+Btw, connect() returning -EPROTO confuses firefox, so that fallback to
+other/IPv4 addresses does not work:
+https://bugzilla.mozilla.org/show_bug.cgi?id=910773
+
+Signed-off-by: Jiri Bohac <jbohac@suse.cz>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/icmpv6.h |    2 ++
+ net/ipv6/icmp.c        |   10 +++++++++-
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+--- a/include/linux/icmpv6.h
++++ b/include/linux/icmpv6.h
+@@ -123,6 +123,8 @@ static inline struct icmp6hdr *icmp6_hdr
+ #define ICMPV6_NOT_NEIGHBOUR          2
+ #define ICMPV6_ADDR_UNREACH           3
+ #define ICMPV6_PORT_UNREACH           4
++#define ICMPV6_POLICY_FAIL            5
++#define ICMPV6_REJECT_ROUTE           6
+ 
+ /*
+  *    Codes for Time Exceeded
+--- a/net/ipv6/icmp.c
++++ b/net/ipv6/icmp.c
+@@ -917,6 +917,14 @@ static const struct icmp6_err {
+               .err    = ECONNREFUSED,
+               .fatal  = 1,
+       },
++      {       /* POLICY_FAIL */
++              .err    = EACCES,
++              .fatal  = 1,
++      },
++      {       /* REJECT_ROUTE */
++              .err    = EACCES,
++              .fatal  = 1,
++      },
+ };
+ 
+ int icmpv6_err_convert(u8 type, u8 code, int *err)
+@@ -928,7 +936,7 @@ int icmpv6_err_convert(u8 type, u8 code,
+       switch (type) {
+       case ICMPV6_DEST_UNREACH:
+               fatal = 1;
+-              if (code <= ICMPV6_PORT_UNREACH) {
++              if (code < ARRAY_SIZE(tab_unreach)) {
+                       *err  = tab_unreach[code].err;
+                       fatal = tab_unreach[code].fatal;
+               }
diff --git a/queue-3.4/ipv6-don-t-depend-on-per-socket-memory-for-neighbour-discovery-messages.patch b/queue-3.4/ipv6-don-t-depend-on-per-socket-memory-for-neighbour-discovery-messages.patch

new file mode 100644 (file)

index 0000000..c73748e
--- /dev/null
+++ b/queue-3.4/ipv6-don-t-depend-on-per-socket-memory-for-neighbour-discovery-messages.patch
@@ -0,0 +1,81 @@
+From b124cefb7fb9d7d58f1b7579bedfd8b8cdad2b11 Mon Sep 17 00:00:00 2001
+From: Thomas Graf <tgraf@suug.ch>
+Date: Tue, 3 Sep 2013 13:37:01 +0200
+Subject: ipv6: Don't depend on per socket memory for neighbour discovery messages
+
+From: Thomas Graf <tgraf@suug.ch>
+
+[ Upstream commit 25a6e6b84fba601eff7c28d30da8ad7cfbef0d43 ]
+
+Allocating skbs when sending out neighbour discovery messages
+currently uses sock_alloc_send_skb() based on a per net namespace
+socket and thus share a socket wmem buffer space.
+
+If a netdevice is temporarily unable to transmit due to carrier
+loss or for other reasons, the queued up ndisc messages will cosnume
+all of the wmem space and will thus prevent from any more skbs to
+be allocated even for netdevices that are able to transmit packets.
+
+The number of neighbour discovery messages sent is very limited,
+use of alloc_skb() bypasses the socket wmem buffer size enforcement
+while the manual call to skb_set_owner_w() maintains the socket
+reference needed for the IPv6 output path.
+
+This patch has orginally been posted by Eric Dumazet in a modified
+form.
+
+Signed-off-by: Thomas Graf <tgraf@suug.ch>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Cc: Stephen Warren <swarren@wwwdotorg.org>
+Cc: Fabio Estevam <festevam@gmail.com>
+Tested-by: Fabio Estevam <fabio.estevam@freescale.com>
+Tested-by: Stephen Warren <swarren@nvidia.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ndisc.c |   16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/net/ipv6/ndisc.c
++++ b/net/ipv6/ndisc.c
+@@ -441,7 +441,6 @@ struct sk_buff *ndisc_build_skb(struct n
+       int hlen = LL_RESERVED_SPACE(dev);
+       int tlen = dev->needed_tailroom;
+       int len;
+-      int err;
+       u8 *opt;
+ 
+       if (!dev->addr_len)
+@@ -451,14 +450,12 @@ struct sk_buff *ndisc_build_skb(struct n
+       if (llinfo)
+               len += ndisc_opt_addr_space(dev);
+ 
+-      skb = sock_alloc_send_skb(sk,
+-                                (MAX_HEADER + sizeof(struct ipv6hdr) +
+-                                 len + hlen + tlen),
+-                                1, &err);
++      skb = alloc_skb((MAX_HEADER + sizeof(struct ipv6hdr) +
++                       len + hlen + tlen), GFP_ATOMIC);
+       if (!skb) {
+               ND_PRINTK0(KERN_ERR
+-                         "ICMPv6 ND: %s() failed to allocate an skb, err=%d.\n",
+-                         __func__, err);
++                         "ICMPv6 ND: %s() failed to allocate an skb.\n",
++                         __func__);
+               return NULL;
+       }
+ 
+@@ -486,6 +483,11 @@ struct sk_buff *ndisc_build_skb(struct n
+                                          csum_partial(hdr,
+                                                       len, 0));
+ 
++      /* Manually assign socket ownership as we avoid calling
++       * sock_alloc_send_pskb() to bypass wmem buffer limits
++       */
++      skb_set_owner_w(skb, sk);
++
+       return skb;
+ }
+ 
diff --git a/queue-3.4/ipv6-don-t-stop-backtracking-in-fib6_lookup_1-if-subtree-does-not-match.patch b/queue-3.4/ipv6-don-t-stop-backtracking-in-fib6_lookup_1-if-subtree-does-not-match.patch

new file mode 100644 (file)

index 0000000..d2332bd
--- /dev/null
+++ b/queue-3.4/ipv6-don-t-stop-backtracking-in-fib6_lookup_1-if-subtree-does-not-match.patch
@@ -0,0 +1,58 @@
+From 2d8153827908cca60567ab7dd7abe92affca8823 Mon Sep 17 00:00:00 2001
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Date: Wed, 7 Aug 2013 02:34:31 +0200
+Subject: ipv6: don't stop backtracking in fib6_lookup_1 if subtree does not match
+
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+
+[ Upstream commit 3e3be275851bc6fc90bfdcd732cd95563acd982b ]
+
+In case a subtree did not match we currently stop backtracking and return
+NULL (root table from fib_lookup). This could yield in invalid routing
+table lookups when using subtrees.
+
+Instead continue to backtrack until a valid subtree or node is found
+and return this match.
+
+Also remove unneeded NULL check.
+
+Reported-by: Teco Boot <teco@inf-net.nl>
+Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+Cc: David Lamparter <equinox@diac24.net>
+Cc: <boutier@pps.univ-paris-diderot.fr>
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_fib.c |   16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -949,14 +949,22 @@ static struct fib6_node * fib6_lookup_1(
+ 
+                       if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
+ #ifdef CONFIG_IPV6_SUBTREES
+-                              if (fn->subtree)
+-                                      fn = fib6_lookup_1(fn->subtree, args + 1);
++                              if (fn->subtree) {
++                                      struct fib6_node *sfn;
++                                      sfn = fib6_lookup_1(fn->subtree,
++                                                          args + 1);
++                                      if (!sfn)
++                                              goto backtrack;
++                                      fn = sfn;
++                              }
+ #endif
+-                              if (!fn || fn->fn_flags & RTN_RTINFO)
++                              if (fn->fn_flags & RTN_RTINFO)
+                                       return fn;
+                       }
+               }
+-
++#ifdef CONFIG_IPV6_SUBTREES
++backtrack:
++#endif
+               if (fn->fn_flags & RTN_ROOT)
+                       break;
+ 
diff --git a/queue-3.4/ipv6-drop-packets-with-multiple-fragmentation-headers.patch b/queue-3.4/ipv6-drop-packets-with-multiple-fragmentation-headers.patch

new file mode 100644 (file)

index 0000000..c189b66
--- /dev/null
+++ b/queue-3.4/ipv6-drop-packets-with-multiple-fragmentation-headers.patch
@@ -0,0 +1,63 @@
+From 301d2f6834afe6f4049b9193a85b05bbb65ffb6a Mon Sep 17 00:00:00 2001
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Date: Fri, 16 Aug 2013 13:30:07 +0200
+Subject: ipv6: drop packets with multiple fragmentation headers
+
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+
+[ Upstream commit f46078cfcd77fa5165bf849f5e568a7ac5fa569c ]
+
+It is not allowed for an ipv6 packet to contain multiple fragmentation
+headers. So discard packets which were already reassembled by
+fragmentation logic and send back a parameter problem icmp.
+
+The updates for RFC 6980 will come in later, I have to do a bit more
+research here.
+
+Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ipv6.h  |    1 +
+ net/ipv6/reassembly.c |    5 +++++
+ 2 files changed, 6 insertions(+)
+
+--- a/include/linux/ipv6.h
++++ b/include/linux/ipv6.h
+@@ -260,6 +260,7 @@ struct inet6_skb_parm {
+ #define IP6SKB_XFRM_TRANSFORMED       1
+ #define IP6SKB_FORWARDED      2
+ #define IP6SKB_REROUTED               4
++#define IP6SKB_FRAGMENTED      16
+ };
+ 
+ #define IP6CB(skb)    ((struct inet6_skb_parm*)((skb)->cb))
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -516,6 +516,7 @@ static int ip6_frag_reasm(struct frag_qu
+       head->tstamp = fq->q.stamp;
+       ipv6_hdr(head)->payload_len = htons(payload_len);
+       IP6CB(head)->nhoff = nhoff;
++      IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
+ 
+       /* Yes, and fold redundant checksum back. 8) */
+       if (head->ip_summed == CHECKSUM_COMPLETE)
+@@ -551,6 +552,9 @@ static int ipv6_frag_rcv(struct sk_buff
+       const struct ipv6hdr *hdr = ipv6_hdr(skb);
+       struct net *net = dev_net(skb_dst(skb)->dev);
+ 
++      if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
++              goto fail_hdr;
++
+       IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
+ 
+       /* Jumbo payload inhibits frag. header */
+@@ -571,6 +575,7 @@ static int ipv6_frag_rcv(struct sk_buff
+                                ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
+ 
+               IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
++              IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
+               return 1;
+       }
+ 
diff --git a/queue-3.4/ipv6-remove-max_addresses-check-from-ipv6_create_tempaddr.patch b/queue-3.4/ipv6-remove-max_addresses-check-from-ipv6_create_tempaddr.patch

new file mode 100644 (file)

index 0000000..30143fd
--- /dev/null
+++ b/queue-3.4/ipv6-remove-max_addresses-check-from-ipv6_create_tempaddr.patch
@@ -0,0 +1,64 @@
+From 79ada7773990ac5e464479790e8b4dc8ab0d48ac Mon Sep 17 00:00:00 2001
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Date: Fri, 16 Aug 2013 13:02:27 +0200
+Subject: ipv6: remove max_addresses check from ipv6_create_tempaddr
+
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+
+[ Upstream commit 4b08a8f1bd8cb4541c93ec170027b4d0782dab52 ]
+
+Because of the max_addresses check attackers were able to disable privacy
+extensions on an interface by creating enough autoconfigured addresses:
+
+<http://seclists.org/oss-sec/2012/q4/292>
+
+But the check is not actually needed: max_addresses protects the
+kernel to install too many ipv6 addresses on an interface and guards
+addrconf_prefix_rcv to install further addresses as soon as this limit
+is reached. We only generate temporary addresses in direct response of
+a new address showing up. As soon as we filled up the maximum number of
+addresses of an interface, we stop installing more addresses and thus
+also stop generating more temp addresses.
+
+Even if the attacker tries to generate a lot of temporary addresses
+by announcing a prefix and removing it again (lifetime == 0) we won't
+install more temp addresses, because the temporary addresses do count
+to the maximum number of addresses, thus we would stop installing new
+autoconfigured addresses when the limit is reached.
+
+This patch fixes CVE-2013-0343 (but other layer-2 attacks are still
+possible).
+
+Thanks to Ding Tianhong to bring this topic up again.
+
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Cc: Ding Tianhong <dingtianhong@huawei.com>
+Cc: George Kargiotakis <kargig@void.gr>
+Cc: P J P <ppandit@redhat.com>
+Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+Acked-by: Ding Tianhong <dingtianhong@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -910,12 +910,10 @@ retry:
+       if (ifp->flags & IFA_F_OPTIMISTIC)
+               addr_flags |= IFA_F_OPTIMISTIC;
+ 
+-      ift = !max_addresses ||
+-            ipv6_count_addresses(idev) < max_addresses ?
+-              ipv6_add_addr(idev, &addr, tmp_plen,
+-                            ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
+-                            addr_flags) : NULL;
+-      if (!ift || IS_ERR(ift)) {
++      ift = ipv6_add_addr(idev, &addr, tmp_plen,
++                          ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
++                          addr_flags);
++      if (IS_ERR(ift)) {
+               in6_ifa_put(ifp);
+               in6_dev_put(idev);
+               printk(KERN_INFO
diff --git a/queue-3.4/macvtap-do-not-zerocopy-if-iov-needs-more-pages-than-max_skb_frags.patch b/queue-3.4/macvtap-do-not-zerocopy-if-iov-needs-more-pages-than-max_skb_frags.patch

new file mode 100644 (file)

index 0000000..4c85bf2
--- /dev/null
+++ b/queue-3.4/macvtap-do-not-zerocopy-if-iov-needs-more-pages-than-max_skb_frags.patch
@@ -0,0 +1,123 @@
+From 11613badc2b7ff4c08b8503ea2580d70117b995a Mon Sep 17 00:00:00 2001
+From: Jason Wang <jasowang@redhat.com>
+Date: Tue, 6 Aug 2013 17:29:19 +0800
+Subject: macvtap: do not zerocopy if iov needs more pages than MAX_SKB_FRAGS
+
+From: Jason Wang <jasowang@redhat.com>
+
+commit ece793fcfc417b3925844be88a6a6dc82ae8f7c6 upstream.
+
+We try to linearize part of the skb when the number of iov is greater than
+MAX_SKB_FRAGS. This is not enough since each single vector may occupy more than
+one pages, so zerocopy_sg_fromiovec() may still fail and may break the guest
+network.
+
+Solve this problem by calculate the pages needed for iov before trying to do
+zerocopy and switch to use copy instead of zerocopy if it needs more than
+MAX_SKB_FRAGS.
+
+This is done through introducing a new helper to count the pages for iov, and
+call uarg->callback() manually when switching from zerocopy to copy to notify
+vhost.
+
+We can do further optimization on top.
+
+This bug were introduced from b92946e2919134ebe2a4083e4302236295ea2a73
+(macvtap: zerocopy: validate vectors before building skb).
+
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macvtap.c |   62 +++++++++++++++++++++++++++++---------------------
+ 1 file changed, 37 insertions(+), 25 deletions(-)
+
+--- a/drivers/net/macvtap.c
++++ b/drivers/net/macvtap.c
+@@ -642,6 +642,28 @@ static int macvtap_skb_to_vnet_hdr(const
+       return 0;
+ }
+ 
++static unsigned long iov_pages(const struct iovec *iv, int offset,
++                             unsigned long nr_segs)
++{
++      unsigned long seg, base;
++      int pages = 0, len, size;
++
++      while (nr_segs && (offset >= iv->iov_len)) {
++              offset -= iv->iov_len;
++              ++iv;
++              --nr_segs;
++      }
++
++      for (seg = 0; seg < nr_segs; seg++) {
++              base = (unsigned long)iv[seg].iov_base + offset;
++              len = iv[seg].iov_len - offset;
++              size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
++              pages += size;
++              offset = 0;
++      }
++
++      return pages;
++}
+ 
+ /* Get packet from user space buffer */
+ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
+@@ -688,31 +710,15 @@ static ssize_t macvtap_get_user(struct m
+       if (unlikely(count > UIO_MAXIOV))
+               goto err;
+ 
+-      if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY))
+-              zerocopy = true;
+-
+-      if (zerocopy) {
+-              /* Userspace may produce vectors with count greater than
+-               * MAX_SKB_FRAGS, so we need to linearize parts of the skb
+-               * to let the rest of data to be fit in the frags.
+-               */
+-              if (count > MAX_SKB_FRAGS) {
+-                      copylen = iov_length(iv, count - MAX_SKB_FRAGS);
+-                      if (copylen < vnet_hdr_len)
+-                              copylen = 0;
+-                      else
+-                              copylen -= vnet_hdr_len;
+-              }
+-              /* There are 256 bytes to be copied in skb, so there is enough
+-               * room for skb expand head in case it is used.
+-               * The rest buffer is mapped from userspace.
+-               */
+-              if (copylen < vnet_hdr.hdr_len)
+-                      copylen = vnet_hdr.hdr_len;
+-              if (!copylen)
+-                      copylen = GOODCOPY_LEN;
++      if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) {
++              copylen = vnet_hdr.hdr_len ? vnet_hdr.hdr_len : GOODCOPY_LEN;
+               linear = copylen;
+-      } else {
++              if (iov_pages(iv, vnet_hdr_len + copylen, count)
++                  <= MAX_SKB_FRAGS)
++                      zerocopy = true;
++      }
++
++      if (!zerocopy) {
+               copylen = len;
+               linear = vnet_hdr.hdr_len;
+       }
+@@ -724,9 +730,15 @@ static ssize_t macvtap_get_user(struct m
+ 
+       if (zerocopy)
+               err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count);
+-      else
++      else {
+               err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
+                                                  len);
++              if (!err && m && m->msg_control) {
++                      struct ubuf_info *uarg = m->msg_control;
++                      uarg->callback(uarg);
++              }
++      }
++
+       if (err)
+               goto err_kfree;
+ 
diff --git a/queue-3.4/neighbour-populate-neigh_parms-on-alloc-before-calling-ndo_neigh_setup.patch b/queue-3.4/neighbour-populate-neigh_parms-on-alloc-before-calling-ndo_neigh_setup.patch

new file mode 100644 (file)

index 0000000..f72dcc9
--- /dev/null
+++ b/queue-3.4/neighbour-populate-neigh_parms-on-alloc-before-calling-ndo_neigh_setup.patch
@@ -0,0 +1,44 @@
+From 864c43185acc3b0cb04dd2d15d4f1a5a8ff6a557 Mon Sep 17 00:00:00 2001
+From: Veaceslav Falico <vfalico@redhat.com>
+Date: Fri, 2 Aug 2013 19:07:38 +0200
+Subject: neighbour: populate neigh_parms on alloc before calling ndo_neigh_setup
+
+From: Veaceslav Falico <vfalico@redhat.com>
+
+[ Upstream commit 63134803a6369dcf7dddf7f0d5e37b9566b308d2 ]
+
+dev->ndo_neigh_setup() might need some of the values of neigh_parms, so
+populate them before calling it.
+
+Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/neighbour.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -1442,16 +1442,18 @@ struct neigh_parms *neigh_parms_alloc(st
+               atomic_set(&p->refcnt, 1);
+               p->reachable_time =
+                               neigh_rand_reach_time(p->base_reachable_time);
++              dev_hold(dev);
++              p->dev = dev;
++              write_pnet(&p->net, hold_net(net));
++              p->sysctl_table = NULL;
+ 
+               if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
++                      release_net(net);
++                      dev_put(dev);
+                       kfree(p);
+                       return NULL;
+               }
+ 
+-              dev_hold(dev);
+-              p->dev = dev;
+-              write_pnet(&p->net, hold_net(net));
+-              p->sysctl_table = NULL;
+               write_lock_bh(&tbl->lock);
+               p->next         = tbl->parms.next;
+               tbl->parms.next = p;
diff --git a/queue-3.4/net-bridge-convert-mldv2-query-mrc-into-msecs_to_jiffies-for-max_delay.patch b/queue-3.4/net-bridge-convert-mldv2-query-mrc-into-msecs_to_jiffies-for-max_delay.patch

new file mode 100644 (file)

index 0000000..df0aed0
--- /dev/null
+++ b/queue-3.4/net-bridge-convert-mldv2-query-mrc-into-msecs_to_jiffies-for-max_delay.patch
@@ -0,0 +1,43 @@
+From 7a46c6dcf91c3025160c9163f5b5a82400c3c078 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <dborkman@redhat.com>
+Date: Thu, 29 Aug 2013 23:55:05 +0200
+Subject: net: bridge: convert MLDv2 Query MRC into msecs_to_jiffies for max_delay
+
+From: Daniel Borkmann <dborkman@redhat.com>
+
+[ Upstream commit 2d98c29b6fb3de44d9eaa73c09f9cf7209346383 ]
+
+While looking into MLDv1/v2 code, I noticed that bridging code does
+not convert it's max delay into jiffies for MLDv2 messages as we do
+in core IPv6' multicast code.
+
+RFC3810, 5.1.3. Maximum Response Code says:
+
+  The Maximum Response Code field specifies the maximum time allowed
+  before sending a responding Report. The actual time allowed, called
+  the Maximum Response Delay, is represented in units of milliseconds,
+  and is derived from the Maximum Response Code as follows: [...]
+
+As we update timers that work with jiffies, we need to convert it.
+
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Cc: Linus Lüssing <linus.luessing@web.de>
+Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_multicast.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -1155,7 +1155,8 @@ static int br_ip6_multicast_query(struct
+               mld2q = (struct mld2_query *)icmp6_hdr(skb);
+               if (!mld2q->mld2q_nsrcs)
+                       group = &mld2q->mld2q_mca;
+-              max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(mld2q->mld2q_mrc) : 1;
++
++              max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mld2q->mld2q_mrc))), 1UL);
+       }
+ 
+       if (!group)
diff --git a/queue-3.4/net-check-net.core.somaxconn-sysctl-values.patch b/queue-3.4/net-check-net.core.somaxconn-sysctl-values.patch

new file mode 100644 (file)

index 0000000..6e0205a
--- /dev/null
+++ b/queue-3.4/net-check-net.core.somaxconn-sysctl-values.patch
@@ -0,0 +1,70 @@
+From 52ab602137b1f0948243d36c6e393fd0643aaa96 Mon Sep 17 00:00:00 2001
+From: Roman Gushchin <klamm@yandex-team.ru>
+Date: Fri, 2 Aug 2013 18:36:40 +0400
+Subject: net: check net.core.somaxconn sysctl values
+
+From: Roman Gushchin <klamm@yandex-team.ru>
+
+[ Upstream commit 5f671d6b4ec3e6d66c2a868738af2cdea09e7509 ]
+
+It's possible to assign an invalid value to the net.core.somaxconn
+sysctl variable, because there is no checks at all.
+
+The sk_max_ack_backlog field of the sock structure is defined as
+unsigned short. Therefore, the backlog argument in inet_listen()
+shouldn't exceed USHRT_MAX. The backlog argument in the listen() syscall
+is truncated to the somaxconn value. So, the somaxconn value shouldn't
+exceed 65535 (USHRT_MAX).
+Also, negative values of somaxconn are meaningless.
+
+before:
+$ sysctl -w net.core.somaxconn=256
+net.core.somaxconn = 256
+$ sysctl -w net.core.somaxconn=65536
+net.core.somaxconn = 65536
+$ sysctl -w net.core.somaxconn=-100
+net.core.somaxconn = -100
+
+after:
+$ sysctl -w net.core.somaxconn=256
+net.core.somaxconn = 256
+$ sysctl -w net.core.somaxconn=65536
+error: "Invalid argument" setting key "net.core.somaxconn"
+$ sysctl -w net.core.somaxconn=-100
+error: "Invalid argument" setting key "net.core.somaxconn"
+
+Based on a prior patch from Changli Gao.
+
+Signed-off-by: Roman Gushchin <klamm@yandex-team.ru>
+Reported-by: Changli Gao <xiaosuo@gmail.com>
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sysctl_net_core.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/core/sysctl_net_core.c
++++ b/net/core/sysctl_net_core.c
+@@ -19,6 +19,9 @@
+ #include <net/sock.h>
+ #include <net/net_ratelimit.h>
+ 
++static int zero = 0;
++static int ushort_max = USHRT_MAX;
++
+ #ifdef CONFIG_RPS
+ static int rps_sock_flow_sysctl(ctl_table *table, int write,
+                               void __user *buffer, size_t *lenp, loff_t *ppos)
+@@ -197,7 +200,9 @@ static struct ctl_table netns_core_table
+               .data           = &init_net.core.sysctl_somaxconn,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+-              .proc_handler   = proc_dointvec
++              .extra1         = &zero,
++              .extra2         = &ushort_max,
++              .proc_handler   = proc_dointvec_minmax
+       },
+       { }
+ };
diff --git a/queue-3.4/net-ipv6-tcp-fix-potential-use-after-free-in-tcp_v6_do_rcv.patch b/queue-3.4/net-ipv6-tcp-fix-potential-use-after-free-in-tcp_v6_do_rcv.patch

new file mode 100644 (file)

index 0000000..984eaf7
--- /dev/null
+++ b/queue-3.4/net-ipv6-tcp-fix-potential-use-after-free-in-tcp_v6_do_rcv.patch
@@ -0,0 +1,45 @@
+From c596c9f2530e3d305d2e4d9e3491df8fab08c97f Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <dborkman@redhat.com>
+Date: Tue, 3 Sep 2013 19:29:12 +0200
+Subject: net: ipv6: tcp: fix potential use after free in tcp_v6_do_rcv
+
+From: Daniel Borkmann <dborkman@redhat.com>
+
+[ Upstream commit 3a1c756590633c0e86df606e5c618c190926a0df ]
+
+In tcp_v6_do_rcv() code, when processing pkt options, we soley work
+on our skb clone opt_skb that we've created earlier before entering
+tcp_rcv_established() on our way. However, only in condition ...
+
+  if (np->rxopt.bits.rxtclass)
+    np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+
+... we work on skb itself. As we extract every other information out
+of opt_skb in ipv6_pktoptions path, this seems wrong, since skb can
+already be released by tcp_rcv_established() earlier on. When we try
+to access it in ipv6_hdr(), we will dereference freed skb.
+
+[ Bug added by commit 4c507d2897bd9b ("net: implement IP_RECVTOS for
+  IP_PKTOPTIONS") ]
+
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Jiri Benc <jbenc@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/tcp_ipv6.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1571,7 +1571,7 @@ ipv6_pktoptions:
+               if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
+                       np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+               if (np->rxopt.bits.rxtclass)
+-                      np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
++                      np->rcv_tclass = ipv6_tclass(ipv6_hdr(opt_skb));
+               if (ipv6_opt_accepted(sk, opt_skb)) {
+                       skb_set_owner_r(opt_skb, sk);
+                       opt_skb = xchg(&np->pktoptions, opt_skb);
diff --git a/queue-3.4/series b/queue-3.4/series

new file mode 100644 (file)

index 0000000..1251e64
--- /dev/null
+++ b/queue-3.4/series
@@ -0,0 +1,20 @@
+htb-fix-sign-extension-bug.patch
+net-check-net.core.somaxconn-sysctl-values.patch
+neighbour-populate-neigh_parms-on-alloc-before-calling-ndo_neigh_setup.patch
+bonding-modify-only-neigh_parms-owned-by-us.patch
+fib_trie-remove-potential-out-of-bound-access.patch
+tcp-cubic-fix-overflow-error-in-bictcp_update.patch
+tcp-cubic-fix-bug-in-bictcp_acked.patch
+ipv6-don-t-stop-backtracking-in-fib6_lookup_1-if-subtree-does-not-match.patch
+8139cp-fix-skb-leak-in-rx_status_loop-failure-path.patch
+tun-signedness-bug-in-tun_get_user.patch
+ipv6-remove-max_addresses-check-from-ipv6_create_tempaddr.patch
+ipv6-drop-packets-with-multiple-fragmentation-headers.patch
+ipv6-don-t-depend-on-per-socket-memory-for-neighbour-discovery-messages.patch
+net-bridge-convert-mldv2-query-mrc-into-msecs_to_jiffies-for-max_delay.patch
+icmpv6-treat-dest-unreachable-codes-5-and-6-as-eacces-not-eproto.patch
+net-ipv6-tcp-fix-potential-use-after-free-in-tcp_v6_do_rcv.patch
+vhost-zerocopy-poll-vq-in-zerocopy-callback.patch
+macvtap-do-not-zerocopy-if-iov-needs-more-pages-than-max_skb_frags.patch
+tipc-fix-lockdep-warning-during-bearer-initialization.patch
+af_key-initialize-satype-in-key_notify_policy_flush.patch
diff --git a/queue-3.4/tcp-cubic-fix-bug-in-bictcp_acked.patch b/queue-3.4/tcp-cubic-fix-bug-in-bictcp_acked.patch

new file mode 100644 (file)

index 0000000..bd89c19
--- /dev/null
+++ b/queue-3.4/tcp-cubic-fix-bug-in-bictcp_acked.patch
@@ -0,0 +1,46 @@
+From 6c9eced920a991497673accec4df3a17ca3ee1a4 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 5 Aug 2013 20:05:12 -0700
+Subject: tcp: cubic: fix bug in bictcp_acked()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit cd6b423afd3c08b27e1fed52db828ade0addbc6b ]
+
+While investigating about strange increase of retransmit rates
+on hosts ~24 days after boot, Van found hystart was disabled
+if ca->epoch_start was 0, as following condition is true
+when tcp_time_stamp high order bit is set.
+
+(s32)(tcp_time_stamp - ca->epoch_start) < HZ
+
+Quoting Van :
+
+ At initialization & after every loss ca->epoch_start is set to zero so
+ I believe that the above line will turn off hystart as soon as the 2^31
+ bit is set in tcp_time_stamp & hystart will stay off for 24 days.
+ I think we've observed that cubic's restart is too aggressive without
+ hystart so this might account for the higher drop rate we observe.
+
+Diagnosed-by: Van Jacobson <vanj@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_cubic.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_cubic.c
++++ b/net/ipv4/tcp_cubic.c
+@@ -416,7 +416,7 @@ static void bictcp_acked(struct sock *sk
+               return;
+ 
+       /* Discard delay samples right after fast recovery */
+-      if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
++      if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+               return;
+ 
+       delay = (rtt_us << 3) / USEC_PER_MSEC;
diff --git a/queue-3.4/tcp-cubic-fix-overflow-error-in-bictcp_update.patch b/queue-3.4/tcp-cubic-fix-overflow-error-in-bictcp_update.patch

new file mode 100644 (file)

index 0000000..a13b7c7
--- /dev/null
+++ b/queue-3.4/tcp-cubic-fix-overflow-error-in-bictcp_update.patch
@@ -0,0 +1,68 @@
+From 9b5d5463fad24e4487187d9bb64f03921f108aeb Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 5 Aug 2013 17:10:15 -0700
+Subject: tcp: cubic: fix overflow error in bictcp_update()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 2ed0edf9090bf4afa2c6fc4f38575a85a80d4b20 ]
+
+commit 17a6e9f1aa9 ("tcp_cubic: fix clock dependency") added an
+overflow error in bictcp_update() in following code :
+
+/* change the unit from HZ to bictcp_HZ */
+t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) -
+      ca->epoch_start) << BICTCP_HZ) / HZ;
+
+Because msecs_to_jiffies() being unsigned long, compiler does
+implicit type promotion.
+
+We really want to constrain (tcp_time_stamp - ca->epoch_start)
+to a signed 32bit value, or else 't' has unexpected high values.
+
+This bugs triggers an increase of retransmit rates ~24 days after
+boot [1], as the high order bit of tcp_time_stamp flips.
+
+[1] for hosts with HZ=1000
+
+Big thanks to Van Jacobson for spotting this problem.
+
+Diagnosed-by: Van Jacobson <vanj@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Stephen Hemminger <stephen@networkplumber.org>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_cubic.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/tcp_cubic.c
++++ b/net/ipv4/tcp_cubic.c
+@@ -206,8 +206,8 @@ static u32 cubic_root(u64 a)
+  */
+ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
+ {
+-      u64 offs;
+-      u32 delta, t, bic_target, max_cnt;
++      u32 delta, bic_target, max_cnt;
++      u64 offs, t;
+ 
+       ca->ack_cnt++;  /* count the number of ACKs */
+ 
+@@ -250,9 +250,11 @@ static inline void bictcp_update(struct
+        * if the cwnd < 1 million packets !!!
+        */
+ 
++      t = (s32)(tcp_time_stamp - ca->epoch_start);
++      t += msecs_to_jiffies(ca->delay_min >> 3);
+       /* change the unit from HZ to bictcp_HZ */
+-      t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3)
+-            - ca->epoch_start) << BICTCP_HZ) / HZ;
++      t <<= BICTCP_HZ;
++      do_div(t, HZ);
+ 
+       if (t < ca->bic_K)              /* t - K */
+               offs = ca->bic_K - t;
diff --git a/queue-3.4/tipc-fix-lockdep-warning-during-bearer-initialization.patch b/queue-3.4/tipc-fix-lockdep-warning-during-bearer-initialization.patch

new file mode 100644 (file)

index 0000000..ef776a3
--- /dev/null
+++ b/queue-3.4/tipc-fix-lockdep-warning-during-bearer-initialization.patch
@@ -0,0 +1,175 @@
+From 9ee9730a92ab8f0bf0e2b3994a9be5fc82380b7c Mon Sep 17 00:00:00 2001
+From: Ying Xue <ying.xue@windriver.com>
+Date: Thu, 16 Aug 2012 12:09:07 +0000
+Subject: tipc: fix lockdep warning during bearer initialization
+
+From: Ying Xue <ying.xue@windriver.com>
+
+[ Upstream commit 4225a398c1352a7a5c14dc07277cb5cc4473983b ]
+
+When the lockdep validator is enabled, it will report the below
+warning when we enable a TIPC bearer:
+
+[ INFO: possible irq lock inversion dependency detected ]
+---------------------------------------------------------
+Possible interrupt unsafe locking scenario:
+
+        CPU0                    CPU1
+        ----                    ----
+   lock(ptype_lock);
+                                local_irq_disable();
+                                lock(tipc_net_lock);
+                                lock(ptype_lock);
+   <Interrupt>
+   lock(tipc_net_lock);
+
+  *** DEADLOCK ***
+
+the shortest dependencies between 2nd lock and 1st lock:
+  -> (ptype_lock){+.+...} ops: 10 {
+[...]
+SOFTIRQ-ON-W at:
+                      [<c1089418>] __lock_acquire+0x528/0x13e0
+                      [<c108a360>] lock_acquire+0x90/0x100
+                      [<c1553c38>] _raw_spin_lock+0x38/0x50
+                      [<c14651ca>] dev_add_pack+0x3a/0x60
+                      [<c182da75>] arp_init+0x1a/0x48
+                      [<c182dce5>] inet_init+0x181/0x27e
+                      [<c1001114>] do_one_initcall+0x34/0x170
+                      [<c17f7329>] kernel_init+0x110/0x1b2
+                      [<c155b6a2>] kernel_thread_helper+0x6/0x10
+[...]
+   ... key      at: [<c17e4b10>] ptype_lock+0x10/0x20
+   ... acquired at:
+    [<c108a360>] lock_acquire+0x90/0x100
+    [<c1553c38>] _raw_spin_lock+0x38/0x50
+    [<c14651ca>] dev_add_pack+0x3a/0x60
+    [<c8bc18d2>] enable_bearer+0xf2/0x140 [tipc]
+    [<c8bb283a>] tipc_enable_bearer+0x1ba/0x450 [tipc]
+    [<c8bb3a04>] tipc_cfg_do_cmd+0x5c4/0x830 [tipc]
+    [<c8bbc032>] handle_cmd+0x42/0xd0 [tipc]
+    [<c148e802>] genl_rcv_msg+0x232/0x280
+    [<c148d3f6>] netlink_rcv_skb+0x86/0xb0
+    [<c148e5bc>] genl_rcv+0x1c/0x30
+    [<c148d144>] netlink_unicast+0x174/0x1f0
+    [<c148ddab>] netlink_sendmsg+0x1eb/0x2d0
+    [<c1456bc1>] sock_aio_write+0x161/0x170
+    [<c1135a7c>] do_sync_write+0xac/0xf0
+    [<c11360f6>] vfs_write+0x156/0x170
+    [<c11361e2>] sys_write+0x42/0x70
+    [<c155b0df>] sysenter_do_call+0x12/0x38
+[...]
+}
+  -> (tipc_net_lock){+..-..} ops: 4 {
+[...]
+    IN-SOFTIRQ-R at:
+                     [<c108953a>] __lock_acquire+0x64a/0x13e0
+                     [<c108a360>] lock_acquire+0x90/0x100
+                     [<c15541cd>] _raw_read_lock_bh+0x3d/0x50
+                     [<c8bb874d>] tipc_recv_msg+0x1d/0x830 [tipc]
+                     [<c8bc195f>] recv_msg+0x3f/0x50 [tipc]
+                     [<c146a5fa>] __netif_receive_skb+0x22a/0x590
+                     [<c146ab0b>] netif_receive_skb+0x2b/0xf0
+                     [<c13c43d2>] pcnet32_poll+0x292/0x780
+                     [<c146b00a>] net_rx_action+0xfa/0x1e0
+                     [<c103a4be>] __do_softirq+0xae/0x1e0
+[...]
+}
+
+>From the log, we can see three different call chains between
+CPU0 and CPU1:
+
+Time 0 on CPU0:
+
+  kernel_init()->inet_init()->dev_add_pack()
+
+At time 0, the ptype_lock is held by CPU0 in dev_add_pack();
+
+Time 1 on CPU1:
+
+  tipc_enable_bearer()->enable_bearer()->dev_add_pack()
+
+At time 1, tipc_enable_bearer() first holds tipc_net_lock, and then
+wants to take ptype_lock to register TIPC protocol handler into the
+networking stack.  But the ptype_lock has been taken by dev_add_pack()
+on CPU0, so at this time the dev_add_pack() running on CPU1 has to be
+busy looping.
+
+Time 2 on CPU0:
+
+  netif_receive_skb()->recv_msg()->tipc_recv_msg()
+
+At time 2, an incoming TIPC packet arrives at CPU0, hence
+tipc_recv_msg() will be invoked. In tipc_recv_msg(), it first wants
+to hold tipc_net_lock.  At the moment, below scenario happens:
+
+On CPU0, below is our sequence of taking locks:
+
+  lock(ptype_lock)->lock(tipc_net_lock)
+
+On CPU1, our sequence of taking locks looks like:
+
+  lock(tipc_net_lock)->lock(ptype_lock)
+
+Obviously deadlock may happen in this case.
+
+But please note the deadlock possibly doesn't occur at all when the
+first TIPC bearer is enabled.  Before enable_bearer() -- running on
+CPU1 does not hold ptype_lock, so the TIPC receive handler (i.e.
+recv_msg()) is not registered successfully via dev_add_pack(), so
+the tipc_recv_msg() cannot be called by recv_msg() even if a TIPC
+message comes to CPU0. But when the second TIPC bearer is
+registered, the deadlock can perhaps really happen.
+
+To fix it, we will push the work of registering TIPC protocol
+handler into workqueue context. After the change, both paths taking
+ptype_lock are always in process contexts, thus, the deadlock should
+never occur.
+
+Signed-off-by: Ying Xue <ying.xue@windriver.com>
+Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/eth_media.c |   15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/net/tipc/eth_media.c
++++ b/net/tipc/eth_media.c
+@@ -53,6 +53,7 @@ struct eth_bearer {
+       struct tipc_bearer *bearer;
+       struct net_device *dev;
+       struct packet_type tipc_packet_type;
++      struct work_struct setup;
+       struct work_struct cleanup;
+ };
+ 
+@@ -138,6 +139,17 @@ static int recv_msg(struct sk_buff *buf,
+ }
+ 
+ /**
++ * setup_bearer - setup association between Ethernet bearer and interface
++ */
++static void setup_bearer(struct work_struct *work)
++{
++      struct eth_bearer *eb_ptr =
++              container_of(work, struct eth_bearer, setup);
++
++      dev_add_pack(&eb_ptr->tipc_packet_type);
++}
++
++/**
+  * enable_bearer - attach TIPC bearer to an Ethernet interface
+  */
+ 
+@@ -181,7 +193,8 @@ static int enable_bearer(struct tipc_bea
+       eb_ptr->tipc_packet_type.func = recv_msg;
+       eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
+       INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list));
+-      dev_add_pack(&eb_ptr->tipc_packet_type);
++      INIT_WORK(&eb_ptr->setup, setup_bearer);
++      schedule_work(&eb_ptr->setup);
+ 
+       /* Associate TIPC bearer with Ethernet bearer */
+ 
diff --git a/queue-3.4/tun-signedness-bug-in-tun_get_user.patch b/queue-3.4/tun-signedness-bug-in-tun_get_user.patch

new file mode 100644 (file)

index 0000000..5769fdc
--- /dev/null
+++ b/queue-3.4/tun-signedness-bug-in-tun_get_user.patch
@@ -0,0 +1,46 @@
+From 3cf27a163c54c80879076abb04c6ce40fb6f679b Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Thu, 15 Aug 2013 15:52:57 +0300
+Subject: tun: signedness bug in tun_get_user()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit 15718ea0d844e4816dbd95d57a8a0e3e264ba90e ]
+
+The recent fix d9bf5f1309 "tun: compare with 0 instead of total_len" is
+not totally correct.  Because "len" and "sizeof()" are size_t type, that
+means they are never less than zero.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -615,8 +615,9 @@ static ssize_t tun_get_user(struct tun_s
+       int offset = 0;
+ 
+       if (!(tun->flags & TUN_NO_PI)) {
+-              if ((len -= sizeof(pi)) > count)
++              if (len < sizeof(pi))
+                       return -EINVAL;
++              len -= sizeof(pi);
+ 
+               if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi)))
+                       return -EFAULT;
+@@ -624,8 +625,9 @@ static ssize_t tun_get_user(struct tun_s
+       }
+ 
+       if (tun->flags & TUN_VNET_HDR) {
+-              if ((len -= tun->vnet_hdr_sz) > count)
++              if (len < tun->vnet_hdr_sz)
+                       return -EINVAL;
++              len -= tun->vnet_hdr_sz;
+ 
+               if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso)))
+                       return -EFAULT;
diff --git a/queue-3.4/vhost-zerocopy-poll-vq-in-zerocopy-callback.patch b/queue-3.4/vhost-zerocopy-poll-vq-in-zerocopy-callback.patch

new file mode 100644 (file)

index 0000000..b534e32
--- /dev/null
+++ b/queue-3.4/vhost-zerocopy-poll-vq-in-zerocopy-callback.patch
@@ -0,0 +1,31 @@
+From b732f7499646e4ba41eec865761de8d2d18a73dc Mon Sep 17 00:00:00 2001
+From: Jason Wang <jasowang@redhat.com>
+Date: Tue, 6 Aug 2013 17:29:18 +0800
+Subject: vhost: zerocopy: poll vq in zerocopy callback
+
+From: Jason Wang <jasowang@redhat.com>
+
+commit c70aa540c7a9f67add11ad3161096fb95233aa2e upstream.
+
+We add used and signal guest in worker thread but did not poll the virtqueue
+during the zero copy callback. This may lead the missing of adding and
+signalling during zerocopy. Solve this by polling the virtqueue and let it
+wakeup the worker during callback.
+
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/vhost.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -1603,6 +1603,7 @@ void vhost_zerocopy_callback(struct ubuf
+       struct vhost_ubuf_ref *ubufs = ubuf->ctx;
+       struct vhost_virtqueue *vq = ubufs->vq;
+ 
++      vhost_poll_queue(&vq->poll);
+       /* set len = 1 to mark this desc buffers done DMA */
+       vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
+       kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 11 Sep 2013 16:13:50 +0000 (09:13 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 11 Sep 2013 16:13:50 +0000 (09:13 -0700)
queue-3.4/8139cp-fix-skb-leak-in-rx_status_loop-failure-path.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/af_key-initialize-satype-in-key_notify_policy_flush.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/bonding-modify-only-neigh_parms-owned-by-us.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/fib_trie-remove-potential-out-of-bound-access.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/htb-fix-sign-extension-bug.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/icmpv6-treat-dest-unreachable-codes-5-and-6-as-eacces-not-eproto.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/ipv6-don-t-depend-on-per-socket-memory-for-neighbour-discovery-messages.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/ipv6-don-t-stop-backtracking-in-fib6_lookup_1-if-subtree-does-not-match.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/ipv6-drop-packets-with-multiple-fragmentation-headers.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/ipv6-remove-max_addresses-check-from-ipv6_create_tempaddr.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/macvtap-do-not-zerocopy-if-iov-needs-more-pages-than-max_skb_frags.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/neighbour-populate-neigh_parms-on-alloc-before-calling-ndo_neigh_setup.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/net-bridge-convert-mldv2-query-mrc-into-msecs_to_jiffies-for-max_delay.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/net-check-net.core.somaxconn-sysctl-values.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/net-ipv6-tcp-fix-potential-use-after-free-in-tcp_v6_do_rcv.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/series	[new file with mode: 0644]	patch \| blob
queue-3.4/tcp-cubic-fix-bug-in-bictcp_acked.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/tcp-cubic-fix-overflow-error-in-bictcp_update.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/tipc-fix-lockdep-warning-during-bearer-initialization.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/tun-signedness-bug-in-tun_get_user.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/vhost-zerocopy-poll-vq-in-zerocopy-callback.patch	[new file with mode: 0644]	patch \| blob