--- /dev/null
+From b2fb347f0dae2ffea9234d3c6b4fd6ad4b75fe81 Mon Sep 17 00:00:00 2001
+From: Dave Jones <davej@redhat.com>
+Date: Fri, 9 Aug 2013 11:16:34 -0700
+Subject: 8139cp: Fix skb leak in rx_status_loop failure path.
+
+From: Dave Jones <davej@redhat.com>
+
+[ Upstream commit d06f5187469eee1b2932c02fd093d113cfc60d5e ]
+
+Introduced in cf3c4c03060b688cbc389ebc5065ebcce5653e96
+("8139cp: Add dma_mapping_error checking")
+
+Signed-off-by: Dave Jones <davej@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/8139cp.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/realtek/8139cp.c
++++ b/drivers/net/ethernet/realtek/8139cp.c
+@@ -524,6 +524,7 @@ rx_status_loop:
+ PCI_DMA_FROMDEVICE);
+ if (dma_mapping_error(&cp->pdev->dev, new_mapping)) {
+ dev->stats.rx_dropped++;
++ kfree_skb(new_skb);
+ goto rx_next;
+ }
+
--- /dev/null
+From 930e232cc73bdf918a0896ffc458902ab8897a88 Mon Sep 17 00:00:00 2001
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Mon, 18 Feb 2013 16:24:20 +0100
+Subject: af_key: initialize satype in key_notify_policy_flush()
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+[ Upstream commit 85dfb745ee40232876663ae206cba35f24ab2a40 ]
+
+This field was left uninitialized. Some user daemons perform check against this
+field.
+
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/key/af_key.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/key/af_key.c
++++ b/net/key/af_key.c
+@@ -1704,6 +1704,7 @@ static int key_notify_sa_flush(const str
+ hdr->sadb_msg_pid = c->pid;
+ hdr->sadb_msg_version = PF_KEY_V2;
+ hdr->sadb_msg_errno = (uint8_t) 0;
++ hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
+ hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
+ hdr->sadb_msg_reserved = 0;
+
--- /dev/null
+From f2c884d19bed59210a90449397ea9d34de0240ba Mon Sep 17 00:00:00 2001
+From: Veaceslav Falico <vfalico@redhat.com>
+Date: Fri, 2 Aug 2013 19:07:39 +0200
+Subject: bonding: modify only neigh_parms owned by us
+
+From: Veaceslav Falico <vfalico@redhat.com>
+
+[ Upstream commit 9918d5bf329d0dc5bb2d9d293bcb772bdb626e65 ]
+
+Otherwise, on neighbour creation, bond_neigh_init() will be called with a
+foreign netdev.
+
+Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -3750,11 +3750,17 @@ static int bond_neigh_init(struct neighb
+ * The bonding ndo_neigh_setup is called at init time beofre any
+ * slave exists. So we must declare proxy setup function which will
+ * be used at run time to resolve the actual slave neigh param setup.
++ *
++ * It's also called by master devices (such as vlans) to setup their
++ * underlying devices. In that case - do nothing, we're already set up from
++ * our init.
+ */
+ static int bond_neigh_setup(struct net_device *dev,
+ struct neigh_parms *parms)
+ {
+- parms->neigh_setup = bond_neigh_init;
++ /* modify only our neigh_parms */
++ if (parms->dev == dev)
++ parms->neigh_setup = bond_neigh_init;
+
+ return 0;
+ }
--- /dev/null
+From 28561e4df3d4d37ea5dbf01cd8623a5fe2dbb369 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 5 Aug 2013 11:18:49 -0700
+Subject: fib_trie: remove potential out of bound access
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit aab515d7c32a34300312416c50314e755ea6f765 ]
+
+AddressSanitizer [1] dynamic checker pointed a potential
+out of bound access in leaf_walk_rcu()
+
+We could allocate one more slot in tnode_new() to leave the prefetch()
+in-place but it looks not worth the pain.
+
+Bug added in commit 82cfbb008572b ("[IPV4] fib_trie: iterator recode")
+
+[1] :
+https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel
+
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_trie.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -71,7 +71,6 @@
+ #include <linux/init.h>
+ #include <linux/list.h>
+ #include <linux/slab.h>
+-#include <linux/prefetch.h>
+ #include <linux/export.h>
+ #include <net/net_namespace.h>
+ #include <net/ip.h>
+@@ -1772,10 +1771,8 @@ static struct leaf *leaf_walk_rcu(struct
+ if (!c)
+ continue;
+
+- if (IS_LEAF(c)) {
+- prefetch(rcu_dereference_rtnl(p->child[idx]));
++ if (IS_LEAF(c))
+ return (struct leaf *) c;
+- }
+
+ /* Rescan start scanning in new node */
+ p = (struct tnode *) c;
--- /dev/null
+From 470ca701447611fa3276dec1994b2be6fb3e3746 Mon Sep 17 00:00:00 2001
+From: stephen hemminger <stephen@networkplumber.org>
+Date: Thu, 1 Aug 2013 22:32:07 -0700
+Subject: htb: fix sign extension bug
+
+From: stephen hemminger <stephen@networkplumber.org>
+
+[ Upstream commit cbd375567f7e4811b1c721f75ec519828ac6583f ]
+
+When userspace passes a large priority value
+the assignment of the unsigned value hopt->prio
+to signed int cl->prio causes cl->prio to become negative and the
+comparison is with TC_HTB_NUMPRIO is always false.
+
+The result is that HTB crashes by referencing outside
+the array when processing packets. With this patch the large value
+wraps around like other values outside the normal range.
+
+See: https://bugzilla.kernel.org/show_bug.cgi?id=60669
+
+Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_htb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -86,7 +86,7 @@ struct htb_class {
+ unsigned int children;
+ struct htb_class *parent; /* parent class */
+
+- int prio; /* these two are used only by leaves... */
++ u32 prio; /* these two are used only by leaves... */
+ int quantum; /* but stored for parent-to-leaf return */
+
+ union {
--- /dev/null
+From a2681be0cbd9c7152cecd6c9a1ab5bfaa10e2471 Mon Sep 17 00:00:00 2001
+From: Jiri Bohac <jbohac@suse.cz>
+Date: Fri, 30 Aug 2013 11:18:45 +0200
+Subject: ICMPv6: treat dest unreachable codes 5 and 6 as EACCES, not EPROTO
+
+From: Jiri Bohac <jbohac@suse.cz>
+
+[ Upstream commit 61e76b178dbe7145e8d6afa84bb4ccea71918994 ]
+
+RFC 4443 has defined two additional codes for ICMPv6 type 1 (destination
+unreachable) messages:
+ 5 - Source address failed ingress/egress policy
+ 6 - Reject route to destination
+
+Now they are treated as protocol error and icmpv6_err_convert() converts them
+to EPROTO.
+
+RFC 4443 says:
+ "Codes 5 and 6 are more informative subsets of code 1."
+
+Treat codes 5 and 6 as code 1 (EACCES)
+
+Btw, connect() returning -EPROTO confuses firefox, so that fallback to
+other/IPv4 addresses does not work:
+https://bugzilla.mozilla.org/show_bug.cgi?id=910773
+
+Signed-off-by: Jiri Bohac <jbohac@suse.cz>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/icmpv6.h | 2 ++
+ net/ipv6/icmp.c | 10 +++++++++-
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+--- a/include/linux/icmpv6.h
++++ b/include/linux/icmpv6.h
+@@ -123,6 +123,8 @@ static inline struct icmp6hdr *icmp6_hdr
+ #define ICMPV6_NOT_NEIGHBOUR 2
+ #define ICMPV6_ADDR_UNREACH 3
+ #define ICMPV6_PORT_UNREACH 4
++#define ICMPV6_POLICY_FAIL 5
++#define ICMPV6_REJECT_ROUTE 6
+
+ /*
+ * Codes for Time Exceeded
+--- a/net/ipv6/icmp.c
++++ b/net/ipv6/icmp.c
+@@ -917,6 +917,14 @@ static const struct icmp6_err {
+ .err = ECONNREFUSED,
+ .fatal = 1,
+ },
++ { /* POLICY_FAIL */
++ .err = EACCES,
++ .fatal = 1,
++ },
++ { /* REJECT_ROUTE */
++ .err = EACCES,
++ .fatal = 1,
++ },
+ };
+
+ int icmpv6_err_convert(u8 type, u8 code, int *err)
+@@ -928,7 +936,7 @@ int icmpv6_err_convert(u8 type, u8 code,
+ switch (type) {
+ case ICMPV6_DEST_UNREACH:
+ fatal = 1;
+- if (code <= ICMPV6_PORT_UNREACH) {
++ if (code < ARRAY_SIZE(tab_unreach)) {
+ *err = tab_unreach[code].err;
+ fatal = tab_unreach[code].fatal;
+ }
--- /dev/null
+From b124cefb7fb9d7d58f1b7579bedfd8b8cdad2b11 Mon Sep 17 00:00:00 2001
+From: Thomas Graf <tgraf@suug.ch>
+Date: Tue, 3 Sep 2013 13:37:01 +0200
+Subject: ipv6: Don't depend on per socket memory for neighbour discovery messages
+
+From: Thomas Graf <tgraf@suug.ch>
+
+[ Upstream commit 25a6e6b84fba601eff7c28d30da8ad7cfbef0d43 ]
+
+Allocating skbs when sending out neighbour discovery messages
+currently uses sock_alloc_send_skb() based on a per net namespace
+socket and thus share a socket wmem buffer space.
+
+If a netdevice is temporarily unable to transmit due to carrier
+loss or for other reasons, the queued up ndisc messages will cosnume
+all of the wmem space and will thus prevent from any more skbs to
+be allocated even for netdevices that are able to transmit packets.
+
+The number of neighbour discovery messages sent is very limited,
+use of alloc_skb() bypasses the socket wmem buffer size enforcement
+while the manual call to skb_set_owner_w() maintains the socket
+reference needed for the IPv6 output path.
+
+This patch has orginally been posted by Eric Dumazet in a modified
+form.
+
+Signed-off-by: Thomas Graf <tgraf@suug.ch>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Cc: Stephen Warren <swarren@wwwdotorg.org>
+Cc: Fabio Estevam <festevam@gmail.com>
+Tested-by: Fabio Estevam <fabio.estevam@freescale.com>
+Tested-by: Stephen Warren <swarren@nvidia.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ndisc.c | 16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/net/ipv6/ndisc.c
++++ b/net/ipv6/ndisc.c
+@@ -441,7 +441,6 @@ struct sk_buff *ndisc_build_skb(struct n
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
+ int len;
+- int err;
+ u8 *opt;
+
+ if (!dev->addr_len)
+@@ -451,14 +450,12 @@ struct sk_buff *ndisc_build_skb(struct n
+ if (llinfo)
+ len += ndisc_opt_addr_space(dev);
+
+- skb = sock_alloc_send_skb(sk,
+- (MAX_HEADER + sizeof(struct ipv6hdr) +
+- len + hlen + tlen),
+- 1, &err);
++ skb = alloc_skb((MAX_HEADER + sizeof(struct ipv6hdr) +
++ len + hlen + tlen), GFP_ATOMIC);
+ if (!skb) {
+ ND_PRINTK0(KERN_ERR
+- "ICMPv6 ND: %s() failed to allocate an skb, err=%d.\n",
+- __func__, err);
++ "ICMPv6 ND: %s() failed to allocate an skb.\n",
++ __func__);
+ return NULL;
+ }
+
+@@ -486,6 +483,11 @@ struct sk_buff *ndisc_build_skb(struct n
+ csum_partial(hdr,
+ len, 0));
+
++ /* Manually assign socket ownership as we avoid calling
++ * sock_alloc_send_pskb() to bypass wmem buffer limits
++ */
++ skb_set_owner_w(skb, sk);
++
+ return skb;
+ }
+
--- /dev/null
+From 2d8153827908cca60567ab7dd7abe92affca8823 Mon Sep 17 00:00:00 2001
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Date: Wed, 7 Aug 2013 02:34:31 +0200
+Subject: ipv6: don't stop backtracking in fib6_lookup_1 if subtree does not match
+
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+
+[ Upstream commit 3e3be275851bc6fc90bfdcd732cd95563acd982b ]
+
+In case a subtree did not match we currently stop backtracking and return
+NULL (root table from fib_lookup). This could yield in invalid routing
+table lookups when using subtrees.
+
+Instead continue to backtrack until a valid subtree or node is found
+and return this match.
+
+Also remove unneeded NULL check.
+
+Reported-by: Teco Boot <teco@inf-net.nl>
+Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+Cc: David Lamparter <equinox@diac24.net>
+Cc: <boutier@pps.univ-paris-diderot.fr>
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_fib.c | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -949,14 +949,22 @@ static struct fib6_node * fib6_lookup_1(
+
+ if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
+ #ifdef CONFIG_IPV6_SUBTREES
+- if (fn->subtree)
+- fn = fib6_lookup_1(fn->subtree, args + 1);
++ if (fn->subtree) {
++ struct fib6_node *sfn;
++ sfn = fib6_lookup_1(fn->subtree,
++ args + 1);
++ if (!sfn)
++ goto backtrack;
++ fn = sfn;
++ }
+ #endif
+- if (!fn || fn->fn_flags & RTN_RTINFO)
++ if (fn->fn_flags & RTN_RTINFO)
+ return fn;
+ }
+ }
+-
++#ifdef CONFIG_IPV6_SUBTREES
++backtrack:
++#endif
+ if (fn->fn_flags & RTN_ROOT)
+ break;
+
--- /dev/null
+From 301d2f6834afe6f4049b9193a85b05bbb65ffb6a Mon Sep 17 00:00:00 2001
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Date: Fri, 16 Aug 2013 13:30:07 +0200
+Subject: ipv6: drop packets with multiple fragmentation headers
+
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+
+[ Upstream commit f46078cfcd77fa5165bf849f5e568a7ac5fa569c ]
+
+It is not allowed for an ipv6 packet to contain multiple fragmentation
+headers. So discard packets which were already reassembled by
+fragmentation logic and send back a parameter problem icmp.
+
+The updates for RFC 6980 will come in later, I have to do a bit more
+research here.
+
+Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ipv6.h | 1 +
+ net/ipv6/reassembly.c | 5 +++++
+ 2 files changed, 6 insertions(+)
+
+--- a/include/linux/ipv6.h
++++ b/include/linux/ipv6.h
+@@ -260,6 +260,7 @@ struct inet6_skb_parm {
+ #define IP6SKB_XFRM_TRANSFORMED 1
+ #define IP6SKB_FORWARDED 2
+ #define IP6SKB_REROUTED 4
++#define IP6SKB_FRAGMENTED 16
+ };
+
+ #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb))
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -516,6 +516,7 @@ static int ip6_frag_reasm(struct frag_qu
+ head->tstamp = fq->q.stamp;
+ ipv6_hdr(head)->payload_len = htons(payload_len);
+ IP6CB(head)->nhoff = nhoff;
++ IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
+
+ /* Yes, and fold redundant checksum back. 8) */
+ if (head->ip_summed == CHECKSUM_COMPLETE)
+@@ -551,6 +552,9 @@ static int ipv6_frag_rcv(struct sk_buff
+ const struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct net *net = dev_net(skb_dst(skb)->dev);
+
++ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
++ goto fail_hdr;
++
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
+
+ /* Jumbo payload inhibits frag. header */
+@@ -571,6 +575,7 @@ static int ipv6_frag_rcv(struct sk_buff
+ ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
+
+ IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
++ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
+ return 1;
+ }
+
--- /dev/null
+From 79ada7773990ac5e464479790e8b4dc8ab0d48ac Mon Sep 17 00:00:00 2001
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Date: Fri, 16 Aug 2013 13:02:27 +0200
+Subject: ipv6: remove max_addresses check from ipv6_create_tempaddr
+
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+
+[ Upstream commit 4b08a8f1bd8cb4541c93ec170027b4d0782dab52 ]
+
+Because of the max_addresses check attackers were able to disable privacy
+extensions on an interface by creating enough autoconfigured addresses:
+
+<http://seclists.org/oss-sec/2012/q4/292>
+
+But the check is not actually needed: max_addresses protects the
+kernel to install too many ipv6 addresses on an interface and guards
+addrconf_prefix_rcv to install further addresses as soon as this limit
+is reached. We only generate temporary addresses in direct response of
+a new address showing up. As soon as we filled up the maximum number of
+addresses of an interface, we stop installing more addresses and thus
+also stop generating more temp addresses.
+
+Even if the attacker tries to generate a lot of temporary addresses
+by announcing a prefix and removing it again (lifetime == 0) we won't
+install more temp addresses, because the temporary addresses do count
+to the maximum number of addresses, thus we would stop installing new
+autoconfigured addresses when the limit is reached.
+
+This patch fixes CVE-2013-0343 (but other layer-2 attacks are still
+possible).
+
+Thanks to Ding Tianhong to bring this topic up again.
+
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Cc: Ding Tianhong <dingtianhong@huawei.com>
+Cc: George Kargiotakis <kargig@void.gr>
+Cc: P J P <ppandit@redhat.com>
+Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+Acked-by: Ding Tianhong <dingtianhong@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -910,12 +910,10 @@ retry:
+ if (ifp->flags & IFA_F_OPTIMISTIC)
+ addr_flags |= IFA_F_OPTIMISTIC;
+
+- ift = !max_addresses ||
+- ipv6_count_addresses(idev) < max_addresses ?
+- ipv6_add_addr(idev, &addr, tmp_plen,
+- ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
+- addr_flags) : NULL;
+- if (!ift || IS_ERR(ift)) {
++ ift = ipv6_add_addr(idev, &addr, tmp_plen,
++ ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
++ addr_flags);
++ if (IS_ERR(ift)) {
+ in6_ifa_put(ifp);
+ in6_dev_put(idev);
+ printk(KERN_INFO
--- /dev/null
+From 11613badc2b7ff4c08b8503ea2580d70117b995a Mon Sep 17 00:00:00 2001
+From: Jason Wang <jasowang@redhat.com>
+Date: Tue, 6 Aug 2013 17:29:19 +0800
+Subject: macvtap: do not zerocopy if iov needs more pages than MAX_SKB_FRAGS
+
+From: Jason Wang <jasowang@redhat.com>
+
+commit ece793fcfc417b3925844be88a6a6dc82ae8f7c6 upstream.
+
+We try to linearize part of the skb when the number of iov is greater than
+MAX_SKB_FRAGS. This is not enough since each single vector may occupy more than
+one pages, so zerocopy_sg_fromiovec() may still fail and may break the guest
+network.
+
+Solve this problem by calculate the pages needed for iov before trying to do
+zerocopy and switch to use copy instead of zerocopy if it needs more than
+MAX_SKB_FRAGS.
+
+This is done through introducing a new helper to count the pages for iov, and
+call uarg->callback() manually when switching from zerocopy to copy to notify
+vhost.
+
+We can do further optimization on top.
+
+This bug were introduced from b92946e2919134ebe2a4083e4302236295ea2a73
+(macvtap: zerocopy: validate vectors before building skb).
+
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macvtap.c | 62 +++++++++++++++++++++++++++++---------------------
+ 1 file changed, 37 insertions(+), 25 deletions(-)
+
+--- a/drivers/net/macvtap.c
++++ b/drivers/net/macvtap.c
+@@ -642,6 +642,28 @@ static int macvtap_skb_to_vnet_hdr(const
+ return 0;
+ }
+
++static unsigned long iov_pages(const struct iovec *iv, int offset,
++ unsigned long nr_segs)
++{
++ unsigned long seg, base;
++ int pages = 0, len, size;
++
++ while (nr_segs && (offset >= iv->iov_len)) {
++ offset -= iv->iov_len;
++ ++iv;
++ --nr_segs;
++ }
++
++ for (seg = 0; seg < nr_segs; seg++) {
++ base = (unsigned long)iv[seg].iov_base + offset;
++ len = iv[seg].iov_len - offset;
++ size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
++ pages += size;
++ offset = 0;
++ }
++
++ return pages;
++}
+
+ /* Get packet from user space buffer */
+ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
+@@ -688,31 +710,15 @@ static ssize_t macvtap_get_user(struct m
+ if (unlikely(count > UIO_MAXIOV))
+ goto err;
+
+- if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY))
+- zerocopy = true;
+-
+- if (zerocopy) {
+- /* Userspace may produce vectors with count greater than
+- * MAX_SKB_FRAGS, so we need to linearize parts of the skb
+- * to let the rest of data to be fit in the frags.
+- */
+- if (count > MAX_SKB_FRAGS) {
+- copylen = iov_length(iv, count - MAX_SKB_FRAGS);
+- if (copylen < vnet_hdr_len)
+- copylen = 0;
+- else
+- copylen -= vnet_hdr_len;
+- }
+- /* There are 256 bytes to be copied in skb, so there is enough
+- * room for skb expand head in case it is used.
+- * The rest buffer is mapped from userspace.
+- */
+- if (copylen < vnet_hdr.hdr_len)
+- copylen = vnet_hdr.hdr_len;
+- if (!copylen)
+- copylen = GOODCOPY_LEN;
++ if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) {
++ copylen = vnet_hdr.hdr_len ? vnet_hdr.hdr_len : GOODCOPY_LEN;
+ linear = copylen;
+- } else {
++ if (iov_pages(iv, vnet_hdr_len + copylen, count)
++ <= MAX_SKB_FRAGS)
++ zerocopy = true;
++ }
++
++ if (!zerocopy) {
+ copylen = len;
+ linear = vnet_hdr.hdr_len;
+ }
+@@ -724,9 +730,15 @@ static ssize_t macvtap_get_user(struct m
+
+ if (zerocopy)
+ err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count);
+- else
++ else {
+ err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
+ len);
++ if (!err && m && m->msg_control) {
++ struct ubuf_info *uarg = m->msg_control;
++ uarg->callback(uarg);
++ }
++ }
++
+ if (err)
+ goto err_kfree;
+
--- /dev/null
+From 864c43185acc3b0cb04dd2d15d4f1a5a8ff6a557 Mon Sep 17 00:00:00 2001
+From: Veaceslav Falico <vfalico@redhat.com>
+Date: Fri, 2 Aug 2013 19:07:38 +0200
+Subject: neighbour: populate neigh_parms on alloc before calling ndo_neigh_setup
+
+From: Veaceslav Falico <vfalico@redhat.com>
+
+[ Upstream commit 63134803a6369dcf7dddf7f0d5e37b9566b308d2 ]
+
+dev->ndo_neigh_setup() might need some of the values of neigh_parms, so
+populate them before calling it.
+
+Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/neighbour.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -1442,16 +1442,18 @@ struct neigh_parms *neigh_parms_alloc(st
+ atomic_set(&p->refcnt, 1);
+ p->reachable_time =
+ neigh_rand_reach_time(p->base_reachable_time);
++ dev_hold(dev);
++ p->dev = dev;
++ write_pnet(&p->net, hold_net(net));
++ p->sysctl_table = NULL;
+
+ if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
++ release_net(net);
++ dev_put(dev);
+ kfree(p);
+ return NULL;
+ }
+
+- dev_hold(dev);
+- p->dev = dev;
+- write_pnet(&p->net, hold_net(net));
+- p->sysctl_table = NULL;
+ write_lock_bh(&tbl->lock);
+ p->next = tbl->parms.next;
+ tbl->parms.next = p;
--- /dev/null
+From 7a46c6dcf91c3025160c9163f5b5a82400c3c078 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <dborkman@redhat.com>
+Date: Thu, 29 Aug 2013 23:55:05 +0200
+Subject: net: bridge: convert MLDv2 Query MRC into msecs_to_jiffies for max_delay
+
+From: Daniel Borkmann <dborkman@redhat.com>
+
+[ Upstream commit 2d98c29b6fb3de44d9eaa73c09f9cf7209346383 ]
+
+While looking into MLDv1/v2 code, I noticed that bridging code does
+not convert it's max delay into jiffies for MLDv2 messages as we do
+in core IPv6' multicast code.
+
+RFC3810, 5.1.3. Maximum Response Code says:
+
+ The Maximum Response Code field specifies the maximum time allowed
+ before sending a responding Report. The actual time allowed, called
+ the Maximum Response Delay, is represented in units of milliseconds,
+ and is derived from the Maximum Response Code as follows: [...]
+
+As we update timers that work with jiffies, we need to convert it.
+
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Cc: Linus Lüssing <linus.luessing@web.de>
+Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_multicast.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -1155,7 +1155,8 @@ static int br_ip6_multicast_query(struct
+ mld2q = (struct mld2_query *)icmp6_hdr(skb);
+ if (!mld2q->mld2q_nsrcs)
+ group = &mld2q->mld2q_mca;
+- max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(mld2q->mld2q_mrc) : 1;
++
++ max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mld2q->mld2q_mrc))), 1UL);
+ }
+
+ if (!group)
--- /dev/null
+From 52ab602137b1f0948243d36c6e393fd0643aaa96 Mon Sep 17 00:00:00 2001
+From: Roman Gushchin <klamm@yandex-team.ru>
+Date: Fri, 2 Aug 2013 18:36:40 +0400
+Subject: net: check net.core.somaxconn sysctl values
+
+From: Roman Gushchin <klamm@yandex-team.ru>
+
+[ Upstream commit 5f671d6b4ec3e6d66c2a868738af2cdea09e7509 ]
+
+It's possible to assign an invalid value to the net.core.somaxconn
+sysctl variable, because there is no checks at all.
+
+The sk_max_ack_backlog field of the sock structure is defined as
+unsigned short. Therefore, the backlog argument in inet_listen()
+shouldn't exceed USHRT_MAX. The backlog argument in the listen() syscall
+is truncated to the somaxconn value. So, the somaxconn value shouldn't
+exceed 65535 (USHRT_MAX).
+Also, negative values of somaxconn are meaningless.
+
+before:
+$ sysctl -w net.core.somaxconn=256
+net.core.somaxconn = 256
+$ sysctl -w net.core.somaxconn=65536
+net.core.somaxconn = 65536
+$ sysctl -w net.core.somaxconn=-100
+net.core.somaxconn = -100
+
+after:
+$ sysctl -w net.core.somaxconn=256
+net.core.somaxconn = 256
+$ sysctl -w net.core.somaxconn=65536
+error: "Invalid argument" setting key "net.core.somaxconn"
+$ sysctl -w net.core.somaxconn=-100
+error: "Invalid argument" setting key "net.core.somaxconn"
+
+Based on a prior patch from Changli Gao.
+
+Signed-off-by: Roman Gushchin <klamm@yandex-team.ru>
+Reported-by: Changli Gao <xiaosuo@gmail.com>
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sysctl_net_core.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/core/sysctl_net_core.c
++++ b/net/core/sysctl_net_core.c
+@@ -19,6 +19,9 @@
+ #include <net/sock.h>
+ #include <net/net_ratelimit.h>
+
++static int zero = 0;
++static int ushort_max = USHRT_MAX;
++
+ #ifdef CONFIG_RPS
+ static int rps_sock_flow_sysctl(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+@@ -197,7 +200,9 @@ static struct ctl_table netns_core_table
+ .data = &init_net.core.sysctl_somaxconn,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+- .proc_handler = proc_dointvec
++ .extra1 = &zero,
++ .extra2 = &ushort_max,
++ .proc_handler = proc_dointvec_minmax
+ },
+ { }
+ };
--- /dev/null
+From c596c9f2530e3d305d2e4d9e3491df8fab08c97f Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <dborkman@redhat.com>
+Date: Tue, 3 Sep 2013 19:29:12 +0200
+Subject: net: ipv6: tcp: fix potential use after free in tcp_v6_do_rcv
+
+From: Daniel Borkmann <dborkman@redhat.com>
+
+[ Upstream commit 3a1c756590633c0e86df606e5c618c190926a0df ]
+
+In tcp_v6_do_rcv() code, when processing pkt options, we soley work
+on our skb clone opt_skb that we've created earlier before entering
+tcp_rcv_established() on our way. However, only in condition ...
+
+ if (np->rxopt.bits.rxtclass)
+ np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+
+... we work on skb itself. As we extract every other information out
+of opt_skb in ipv6_pktoptions path, this seems wrong, since skb can
+already be released by tcp_rcv_established() earlier on. When we try
+to access it in ipv6_hdr(), we will dereference freed skb.
+
+[ Bug added by commit 4c507d2897bd9b ("net: implement IP_RECVTOS for
+ IP_PKTOPTIONS") ]
+
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Jiri Benc <jbenc@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/tcp_ipv6.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1571,7 +1571,7 @@ ipv6_pktoptions:
+ if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
+ np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+ if (np->rxopt.bits.rxtclass)
+- np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
++ np->rcv_tclass = ipv6_tclass(ipv6_hdr(opt_skb));
+ if (ipv6_opt_accepted(sk, opt_skb)) {
+ skb_set_owner_r(opt_skb, sk);
+ opt_skb = xchg(&np->pktoptions, opt_skb);
--- /dev/null
+htb-fix-sign-extension-bug.patch
+net-check-net.core.somaxconn-sysctl-values.patch
+neighbour-populate-neigh_parms-on-alloc-before-calling-ndo_neigh_setup.patch
+bonding-modify-only-neigh_parms-owned-by-us.patch
+fib_trie-remove-potential-out-of-bound-access.patch
+tcp-cubic-fix-overflow-error-in-bictcp_update.patch
+tcp-cubic-fix-bug-in-bictcp_acked.patch
+ipv6-don-t-stop-backtracking-in-fib6_lookup_1-if-subtree-does-not-match.patch
+8139cp-fix-skb-leak-in-rx_status_loop-failure-path.patch
+tun-signedness-bug-in-tun_get_user.patch
+ipv6-remove-max_addresses-check-from-ipv6_create_tempaddr.patch
+ipv6-drop-packets-with-multiple-fragmentation-headers.patch
+ipv6-don-t-depend-on-per-socket-memory-for-neighbour-discovery-messages.patch
+net-bridge-convert-mldv2-query-mrc-into-msecs_to_jiffies-for-max_delay.patch
+icmpv6-treat-dest-unreachable-codes-5-and-6-as-eacces-not-eproto.patch
+net-ipv6-tcp-fix-potential-use-after-free-in-tcp_v6_do_rcv.patch
+vhost-zerocopy-poll-vq-in-zerocopy-callback.patch
+macvtap-do-not-zerocopy-if-iov-needs-more-pages-than-max_skb_frags.patch
+tipc-fix-lockdep-warning-during-bearer-initialization.patch
+af_key-initialize-satype-in-key_notify_policy_flush.patch
--- /dev/null
+From 6c9eced920a991497673accec4df3a17ca3ee1a4 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 5 Aug 2013 20:05:12 -0700
+Subject: tcp: cubic: fix bug in bictcp_acked()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit cd6b423afd3c08b27e1fed52db828ade0addbc6b ]
+
+While investigating about strange increase of retransmit rates
+on hosts ~24 days after boot, Van found hystart was disabled
+if ca->epoch_start was 0, as following condition is true
+when tcp_time_stamp high order bit is set.
+
+(s32)(tcp_time_stamp - ca->epoch_start) < HZ
+
+Quoting Van :
+
+ At initialization & after every loss ca->epoch_start is set to zero so
+ I believe that the above line will turn off hystart as soon as the 2^31
+ bit is set in tcp_time_stamp & hystart will stay off for 24 days.
+ I think we've observed that cubic's restart is too aggressive without
+ hystart so this might account for the higher drop rate we observe.
+
+Diagnosed-by: Van Jacobson <vanj@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_cubic.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_cubic.c
++++ b/net/ipv4/tcp_cubic.c
+@@ -416,7 +416,7 @@ static void bictcp_acked(struct sock *sk
+ return;
+
+ /* Discard delay samples right after fast recovery */
+- if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
++ if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+ return;
+
+ delay = (rtt_us << 3) / USEC_PER_MSEC;
--- /dev/null
+From 9b5d5463fad24e4487187d9bb64f03921f108aeb Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 5 Aug 2013 17:10:15 -0700
+Subject: tcp: cubic: fix overflow error in bictcp_update()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 2ed0edf9090bf4afa2c6fc4f38575a85a80d4b20 ]
+
+commit 17a6e9f1aa9 ("tcp_cubic: fix clock dependency") added an
+overflow error in bictcp_update() in following code :
+
+/* change the unit from HZ to bictcp_HZ */
+t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) -
+ ca->epoch_start) << BICTCP_HZ) / HZ;
+
+Because msecs_to_jiffies() being unsigned long, compiler does
+implicit type promotion.
+
+We really want to constrain (tcp_time_stamp - ca->epoch_start)
+to a signed 32bit value, or else 't' has unexpected high values.
+
+This bugs triggers an increase of retransmit rates ~24 days after
+boot [1], as the high order bit of tcp_time_stamp flips.
+
+[1] for hosts with HZ=1000
+
+Big thanks to Van Jacobson for spotting this problem.
+
+Diagnosed-by: Van Jacobson <vanj@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Stephen Hemminger <stephen@networkplumber.org>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_cubic.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/tcp_cubic.c
++++ b/net/ipv4/tcp_cubic.c
+@@ -206,8 +206,8 @@ static u32 cubic_root(u64 a)
+ */
+ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
+ {
+- u64 offs;
+- u32 delta, t, bic_target, max_cnt;
++ u32 delta, bic_target, max_cnt;
++ u64 offs, t;
+
+ ca->ack_cnt++; /* count the number of ACKs */
+
+@@ -250,9 +250,11 @@ static inline void bictcp_update(struct
+ * if the cwnd < 1 million packets !!!
+ */
+
++ t = (s32)(tcp_time_stamp - ca->epoch_start);
++ t += msecs_to_jiffies(ca->delay_min >> 3);
+ /* change the unit from HZ to bictcp_HZ */
+- t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3)
+- - ca->epoch_start) << BICTCP_HZ) / HZ;
++ t <<= BICTCP_HZ;
++ do_div(t, HZ);
+
+ if (t < ca->bic_K) /* t - K */
+ offs = ca->bic_K - t;
--- /dev/null
+From 9ee9730a92ab8f0bf0e2b3994a9be5fc82380b7c Mon Sep 17 00:00:00 2001
+From: Ying Xue <ying.xue@windriver.com>
+Date: Thu, 16 Aug 2012 12:09:07 +0000
+Subject: tipc: fix lockdep warning during bearer initialization
+
+From: Ying Xue <ying.xue@windriver.com>
+
+[ Upstream commit 4225a398c1352a7a5c14dc07277cb5cc4473983b ]
+
+When the lockdep validator is enabled, it will report the below
+warning when we enable a TIPC bearer:
+
+[ INFO: possible irq lock inversion dependency detected ]
+---------------------------------------------------------
+Possible interrupt unsafe locking scenario:
+
+ CPU0 CPU1
+ ---- ----
+ lock(ptype_lock);
+ local_irq_disable();
+ lock(tipc_net_lock);
+ lock(ptype_lock);
+ <Interrupt>
+ lock(tipc_net_lock);
+
+ *** DEADLOCK ***
+
+the shortest dependencies between 2nd lock and 1st lock:
+ -> (ptype_lock){+.+...} ops: 10 {
+[...]
+SOFTIRQ-ON-W at:
+ [<c1089418>] __lock_acquire+0x528/0x13e0
+ [<c108a360>] lock_acquire+0x90/0x100
+ [<c1553c38>] _raw_spin_lock+0x38/0x50
+ [<c14651ca>] dev_add_pack+0x3a/0x60
+ [<c182da75>] arp_init+0x1a/0x48
+ [<c182dce5>] inet_init+0x181/0x27e
+ [<c1001114>] do_one_initcall+0x34/0x170
+ [<c17f7329>] kernel_init+0x110/0x1b2
+ [<c155b6a2>] kernel_thread_helper+0x6/0x10
+[...]
+ ... key at: [<c17e4b10>] ptype_lock+0x10/0x20
+ ... acquired at:
+ [<c108a360>] lock_acquire+0x90/0x100
+ [<c1553c38>] _raw_spin_lock+0x38/0x50
+ [<c14651ca>] dev_add_pack+0x3a/0x60
+ [<c8bc18d2>] enable_bearer+0xf2/0x140 [tipc]
+ [<c8bb283a>] tipc_enable_bearer+0x1ba/0x450 [tipc]
+ [<c8bb3a04>] tipc_cfg_do_cmd+0x5c4/0x830 [tipc]
+ [<c8bbc032>] handle_cmd+0x42/0xd0 [tipc]
+ [<c148e802>] genl_rcv_msg+0x232/0x280
+ [<c148d3f6>] netlink_rcv_skb+0x86/0xb0
+ [<c148e5bc>] genl_rcv+0x1c/0x30
+ [<c148d144>] netlink_unicast+0x174/0x1f0
+ [<c148ddab>] netlink_sendmsg+0x1eb/0x2d0
+ [<c1456bc1>] sock_aio_write+0x161/0x170
+ [<c1135a7c>] do_sync_write+0xac/0xf0
+ [<c11360f6>] vfs_write+0x156/0x170
+ [<c11361e2>] sys_write+0x42/0x70
+ [<c155b0df>] sysenter_do_call+0x12/0x38
+[...]
+}
+ -> (tipc_net_lock){+..-..} ops: 4 {
+[...]
+ IN-SOFTIRQ-R at:
+ [<c108953a>] __lock_acquire+0x64a/0x13e0
+ [<c108a360>] lock_acquire+0x90/0x100
+ [<c15541cd>] _raw_read_lock_bh+0x3d/0x50
+ [<c8bb874d>] tipc_recv_msg+0x1d/0x830 [tipc]
+ [<c8bc195f>] recv_msg+0x3f/0x50 [tipc]
+ [<c146a5fa>] __netif_receive_skb+0x22a/0x590
+ [<c146ab0b>] netif_receive_skb+0x2b/0xf0
+ [<c13c43d2>] pcnet32_poll+0x292/0x780
+ [<c146b00a>] net_rx_action+0xfa/0x1e0
+ [<c103a4be>] __do_softirq+0xae/0x1e0
+[...]
+}
+
+>From the log, we can see three different call chains between
+CPU0 and CPU1:
+
+Time 0 on CPU0:
+
+ kernel_init()->inet_init()->dev_add_pack()
+
+At time 0, the ptype_lock is held by CPU0 in dev_add_pack();
+
+Time 1 on CPU1:
+
+ tipc_enable_bearer()->enable_bearer()->dev_add_pack()
+
+At time 1, tipc_enable_bearer() first holds tipc_net_lock, and then
+wants to take ptype_lock to register TIPC protocol handler into the
+networking stack. But the ptype_lock has been taken by dev_add_pack()
+on CPU0, so at this time the dev_add_pack() running on CPU1 has to be
+busy looping.
+
+Time 2 on CPU0:
+
+ netif_receive_skb()->recv_msg()->tipc_recv_msg()
+
+At time 2, an incoming TIPC packet arrives at CPU0, hence
+tipc_recv_msg() will be invoked. In tipc_recv_msg(), it first wants
+to hold tipc_net_lock. At the moment, below scenario happens:
+
+On CPU0, below is our sequence of taking locks:
+
+ lock(ptype_lock)->lock(tipc_net_lock)
+
+On CPU1, our sequence of taking locks looks like:
+
+ lock(tipc_net_lock)->lock(ptype_lock)
+
+Obviously deadlock may happen in this case.
+
+But please note the deadlock possibly doesn't occur at all when the
+first TIPC bearer is enabled. Before enable_bearer() -- running on
+CPU1 does not hold ptype_lock, so the TIPC receive handler (i.e.
+recv_msg()) is not registered successfully via dev_add_pack(), so
+the tipc_recv_msg() cannot be called by recv_msg() even if a TIPC
+message comes to CPU0. But when the second TIPC bearer is
+registered, the deadlock can perhaps really happen.
+
+To fix it, we will push the work of registering TIPC protocol
+handler into workqueue context. After the change, both paths taking
+ptype_lock are always in process contexts, thus, the deadlock should
+never occur.
+
+Signed-off-by: Ying Xue <ying.xue@windriver.com>
+Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/eth_media.c | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/net/tipc/eth_media.c
++++ b/net/tipc/eth_media.c
+@@ -53,6 +53,7 @@ struct eth_bearer {
+ struct tipc_bearer *bearer;
+ struct net_device *dev;
+ struct packet_type tipc_packet_type;
++ struct work_struct setup;
+ struct work_struct cleanup;
+ };
+
+@@ -138,6 +139,17 @@ static int recv_msg(struct sk_buff *buf,
+ }
+
+ /**
++ * setup_bearer - setup association between Ethernet bearer and interface
++ */
++static void setup_bearer(struct work_struct *work)
++{
++ struct eth_bearer *eb_ptr =
++ container_of(work, struct eth_bearer, setup);
++
++ dev_add_pack(&eb_ptr->tipc_packet_type);
++}
++
++/**
+ * enable_bearer - attach TIPC bearer to an Ethernet interface
+ */
+
+@@ -181,7 +193,8 @@ static int enable_bearer(struct tipc_bea
+ eb_ptr->tipc_packet_type.func = recv_msg;
+ eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
+ INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list));
+- dev_add_pack(&eb_ptr->tipc_packet_type);
++ INIT_WORK(&eb_ptr->setup, setup_bearer);
++ schedule_work(&eb_ptr->setup);
+
+ /* Associate TIPC bearer with Ethernet bearer */
+
--- /dev/null
+From 3cf27a163c54c80879076abb04c6ce40fb6f679b Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Thu, 15 Aug 2013 15:52:57 +0300
+Subject: tun: signedness bug in tun_get_user()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit 15718ea0d844e4816dbd95d57a8a0e3e264ba90e ]
+
+The recent fix d9bf5f1309 "tun: compare with 0 instead of total_len" is
+not totally correct. Because "len" and "sizeof()" are size_t type, that
+means they are never less than zero.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -615,8 +615,9 @@ static ssize_t tun_get_user(struct tun_s
+ int offset = 0;
+
+ if (!(tun->flags & TUN_NO_PI)) {
+- if ((len -= sizeof(pi)) > count)
++ if (len < sizeof(pi))
+ return -EINVAL;
++ len -= sizeof(pi);
+
+ if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi)))
+ return -EFAULT;
+@@ -624,8 +625,9 @@ static ssize_t tun_get_user(struct tun_s
+ }
+
+ if (tun->flags & TUN_VNET_HDR) {
+- if ((len -= tun->vnet_hdr_sz) > count)
++ if (len < tun->vnet_hdr_sz)
+ return -EINVAL;
++ len -= tun->vnet_hdr_sz;
+
+ if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso)))
+ return -EFAULT;
--- /dev/null
+From b732f7499646e4ba41eec865761de8d2d18a73dc Mon Sep 17 00:00:00 2001
+From: Jason Wang <jasowang@redhat.com>
+Date: Tue, 6 Aug 2013 17:29:18 +0800
+Subject: vhost: zerocopy: poll vq in zerocopy callback
+
+From: Jason Wang <jasowang@redhat.com>
+
+commit c70aa540c7a9f67add11ad3161096fb95233aa2e upstream.
+
+We add used and signal guest in worker thread but did not poll the virtqueue
+during the zero copy callback. This may lead the missing of adding and
+signalling during zerocopy. Solve this by polling the virtqueue and let it
+wakeup the worker during callback.
+
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/vhost.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -1603,6 +1603,7 @@ void vhost_zerocopy_callback(struct ubuf
+ struct vhost_ubuf_ref *ubufs = ubuf->ctx;
+ struct vhost_virtqueue *vq = ubufs->vq;
+
++ vhost_poll_queue(&vq->poll);
+ /* set len = 1 to mark this desc buffers done DMA */
+ vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
+ kref_put(&ubufs->kref, vhost_zerocopy_done_signal);