--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 10 Feb 2014 11:42:35 -0800
+Subject: 6lowpan: fix lockdep splats
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 20e7c4e80dcd01dad5e6c8b32455228b8fe9c619 ]
+
+When a device ndo_start_xmit() calls again dev_queue_xmit(),
+lockdep can complain because dev_queue_xmit() is re-entered and the
+spinlocks protecting tx queues share a common lockdep class.
+
+Same issue was fixed for bonding/l2tp/ppp in commits
+
+0daa2303028a6 ("[PATCH] bonding: lockdep annotation")
+49ee49202b4ac ("bonding: set qdisc_tx_busylock to avoid LOCKDEP splat")
+23d3b8bfb8eb2 ("net: qdisc busylock needs lockdep annotations ")
+303c07db487be ("ppp: set qdisc_tx_busylock to avoid LOCKDEP splat ")
+
+Reported-by: Alexander Aring <alex.aring@gmail.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Tested-by: Alexander Aring <alex.aring@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ieee802154/6lowpan.c | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/net/ieee802154/6lowpan.c
++++ b/net/ieee802154/6lowpan.c
+@@ -1249,7 +1249,27 @@ static struct header_ops lowpan_header_o
+ .create = lowpan_header_create,
+ };
+
++static struct lock_class_key lowpan_tx_busylock;
++static struct lock_class_key lowpan_netdev_xmit_lock_key;
++
++static void lowpan_set_lockdep_class_one(struct net_device *dev,
++ struct netdev_queue *txq,
++ void *_unused)
++{
++ lockdep_set_class(&txq->_xmit_lock,
++ &lowpan_netdev_xmit_lock_key);
++}
++
++
++static int lowpan_dev_init(struct net_device *dev)
++{
++ netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL);
++ dev->qdisc_tx_busylock = &lowpan_tx_busylock;
++ return 0;
++}
++
+ static const struct net_device_ops lowpan_netdev_ops = {
++ .ndo_init = lowpan_dev_init,
+ .ndo_start_xmit = lowpan_xmit,
+ .ndo_set_mac_address = lowpan_set_address,
+ };
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Richard Yao <ryao@gentoo.org>
+Date: Sat, 8 Feb 2014 19:32:01 -0500
+Subject: 9p/trans_virtio.c: Fix broken zero-copy on vmalloc() buffers
+
+From: Richard Yao <ryao@gentoo.org>
+
+[ Upstream commit b6f52ae2f0d32387bde2b89883e3b64d88b9bfe8 ]
+
+The 9p-virtio transport does zero copy on things larger than 1024 bytes
+in size. It accomplishes this by returning the physical addresses of
+pages to the virtio-pci device. At present, the translation is usually a
+bit shift.
+
+That approach produces an invalid page address when we read/write to
+vmalloc buffers, such as those used for Linux kernel modules. Any
+attempt to load a Linux kernel module from 9p-virtio produces the
+following stack.
+
+[<ffffffff814878ce>] p9_virtio_zc_request+0x45e/0x510
+[<ffffffff814814ed>] p9_client_zc_rpc.constprop.16+0xfd/0x4f0
+[<ffffffff814839dd>] p9_client_read+0x15d/0x240
+[<ffffffff811c8440>] v9fs_fid_readn+0x50/0xa0
+[<ffffffff811c84a0>] v9fs_file_readn+0x10/0x20
+[<ffffffff811c84e7>] v9fs_file_read+0x37/0x70
+[<ffffffff8114e3fb>] vfs_read+0x9b/0x160
+[<ffffffff81153571>] kernel_read+0x41/0x60
+[<ffffffff810c83ab>] copy_module_from_fd.isra.34+0xfb/0x180
+
+Subsequently, QEMU will die printing:
+
+qemu-system-x86_64: virtio: trying to map MMIO memory
+
+This patch enables 9p-virtio to correctly handle this case. This not
+only enables us to load Linux kernel modules off virtfs, but also
+enables ZFS file-based vdevs on virtfs to be used without killing QEMU.
+
+Special thanks to both Avi Kivity and Alexander Graf for their
+interpretation of QEMU backtraces. Without their guidence, tracking down
+this bug would have taken much longer. Also, special thanks to Linus
+Torvalds for his insightful explanation of why this should use
+is_vmalloc_addr() instead of is_vmalloc_or_module_addr():
+
+https://lkml.org/lkml/2014/2/8/272
+
+Signed-off-by: Richard Yao <ryao@gentoo.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/9p/trans_virtio.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/9p/trans_virtio.c
++++ b/net/9p/trans_virtio.c
+@@ -340,7 +340,10 @@ static int p9_get_mapped_pages(struct vi
+ int count = nr_pages;
+ while (nr_pages) {
+ s = rest_of_page(data);
+- pages[index++] = kmap_to_page(data);
++ if (is_vmalloc_addr(data))
++ pages[index++] = vmalloc_to_page(data);
++ else
++ pages[index++] = kmap_to_page(data);
+ data += s;
+ nr_pages--;
+ }
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Antonio Quartulli <antonio@meshcoding.com>
+Date: Sat, 15 Feb 2014 02:17:20 +0100
+Subject: batman-adv: avoid double free when orig_node initialization fails
+
+From: Antonio Quartulli <antonio@meshcoding.com>
+
+[ Upstream commit a5a5cb8cab526af2f6cbe9715f8ca843192f0d81 ]
+
+In the failure path of the orig_node initialization routine
+the orig_node->bat_iv.bcast_own field is free'd twice: first
+in batadv_iv_ogm_orig_get() and then later in
+batadv_orig_node_free_rcu().
+
+Fix it by removing the kfree in batadv_iv_ogm_orig_get().
+
+Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/bat_iv_ogm.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/net/batman-adv/bat_iv_ogm.c
++++ b/net/batman-adv/bat_iv_ogm.c
+@@ -243,18 +243,16 @@ batadv_iv_ogm_orig_get(struct batadv_pri
+ size = bat_priv->num_ifaces * sizeof(uint8_t);
+ orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC);
+ if (!orig_node->bat_iv.bcast_own_sum)
+- goto free_bcast_own;
++ goto free_orig_node;
+
+ hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
+ batadv_choose_orig, orig_node,
+ &orig_node->hash_entry);
+ if (hash_added != 0)
+- goto free_bcast_own;
++ goto free_orig_node;
+
+ return orig_node;
+
+-free_bcast_own:
+- kfree(orig_node->bat_iv.bcast_own);
+ free_orig_node:
+ /* free twice, as batadv_orig_node_new sets refcount to 2 */
+ batadv_orig_node_free_ref(orig_node);
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Antonio Quartulli <antonio@open-mesh.com>
+Date: Wed, 29 Jan 2014 11:25:12 +0100
+Subject: batman-adv: avoid potential race condition when adding a new neighbour
+
+From: Antonio Quartulli <antonio@open-mesh.com>
+
+[ Upstream commit 08bf0ed29c7ded45c477d08618220dd200c3524a ]
+
+When adding a new neighbour it is important to atomically
+perform the following:
+- check if the neighbour already exists
+- append the neighbour to the proper list
+
+If the two operations are not performed in an atomic context
+it is possible that two concurrent insertions add the same
+neighbour twice.
+
+Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/bat_iv_ogm.c | 22 ++++++++++++++++------
+ net/batman-adv/originator.c | 36 ++++++++++++++++++++++++++++++++++++
+ net/batman-adv/originator.h | 4 ++++
+ 3 files changed, 56 insertions(+), 6 deletions(-)
+
+--- a/net/batman-adv/bat_iv_ogm.c
++++ b/net/batman-adv/bat_iv_ogm.c
+@@ -268,7 +268,7 @@ batadv_iv_ogm_neigh_new(struct batadv_ha
+ struct batadv_orig_node *orig_neigh)
+ {
+ struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+- struct batadv_neigh_node *neigh_node;
++ struct batadv_neigh_node *neigh_node, *tmp_neigh_node;
+
+ neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, orig_node);
+ if (!neigh_node)
+@@ -276,14 +276,24 @@ batadv_iv_ogm_neigh_new(struct batadv_ha
+
+ spin_lock_init(&neigh_node->bat_iv.lq_update_lock);
+
+- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+- "Creating new neighbor %pM for orig_node %pM on interface %s\n",
+- neigh_addr, orig_node->orig, hard_iface->net_dev->name);
+-
+ spin_lock_bh(&orig_node->neigh_list_lock);
+- hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
++ tmp_neigh_node = batadv_neigh_node_get(orig_node, hard_iface,
++ neigh_addr);
++ if (!tmp_neigh_node) {
++ hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
++ } else {
++ kfree(neigh_node);
++ batadv_hardif_free_ref(hard_iface);
++ neigh_node = tmp_neigh_node;
++ }
+ spin_unlock_bh(&orig_node->neigh_list_lock);
+
++ if (!tmp_neigh_node)
++ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
++ "Creating new neighbor %pM for orig_node %pM on interface %s\n",
++ neigh_addr, orig_node->orig,
++ hard_iface->net_dev->name);
++
+ out:
+ return neigh_node;
+ }
+--- a/net/batman-adv/originator.c
++++ b/net/batman-adv/originator.c
+@@ -511,6 +511,42 @@ void batadv_purge_orig_ref(struct batadv
+ _batadv_purge_orig(bat_priv);
+ }
+
++/**
++ * batadv_neigh_node_get - retrieve a neighbour from the list
++ * @orig_node: originator which the neighbour belongs to
++ * @hard_iface: the interface where this neighbour is connected to
++ * @addr: the address of the neighbour
++ *
++ * Looks for and possibly returns a neighbour belonging to this originator list
++ * which is connected through the provided hard interface.
++ * Returns NULL if the neighbour is not found.
++ */
++struct batadv_neigh_node *
++batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
++ const struct batadv_hard_iface *hard_iface,
++ const uint8_t *addr)
++{
++ struct batadv_neigh_node *tmp_neigh_node, *res = NULL;
++
++ rcu_read_lock();
++ hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) {
++ if (!batadv_compare_eth(tmp_neigh_node->addr, addr))
++ continue;
++
++ if (tmp_neigh_node->if_incoming != hard_iface)
++ continue;
++
++ if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
++ continue;
++
++ res = tmp_neigh_node;
++ break;
++ }
++ rcu_read_unlock();
++
++ return res;
++}
++
+ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
+ {
+ struct net_device *net_dev = (struct net_device *)seq->private;
+--- a/net/batman-adv/originator.h
++++ b/net/batman-adv/originator.h
+@@ -31,6 +31,10 @@ void batadv_orig_node_free_ref_now(struc
+ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
+ const uint8_t *addr);
+ struct batadv_neigh_node *
++batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
++ const struct batadv_hard_iface *hard_iface,
++ const uint8_t *addr);
++struct batadv_neigh_node *
+ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
+ const uint8_t *neigh_addr,
+ struct batadv_orig_node *orig_node);
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Antonio Quartulli <antonio@meshcoding.com>
+Date: Sat, 15 Feb 2014 21:50:37 +0100
+Subject: batman-adv: fix potential kernel paging error for unicast transmissions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Antonio Quartulli <antonio@meshcoding.com>
+
+[ Upstream commit 70b271a78beba787155d6696aacd7c4d4a251c50 ]
+
+batadv_send_skb_prepare_unicast(_4addr) might reallocate the
+skb's data. If it does then our ethhdr pointer is not valid
+anymore in batadv_send_skb_unicast(), resulting in a kernel
+paging error.
+
+Fixing this by refetching the ethhdr pointer after the
+potential reallocation.
+
+Signed-off-by: Linus Lüssing <linus.luessing@web.de>
+Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/send.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/batman-adv/send.c
++++ b/net/batman-adv/send.c
+@@ -256,9 +256,9 @@ static int batadv_send_skb_unicast(struc
+ struct batadv_orig_node *orig_node,
+ unsigned short vid)
+ {
+- struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
++ struct ethhdr *ethhdr;
+ struct batadv_unicast_packet *unicast_packet;
+- int ret = NET_XMIT_DROP;
++ int ret = NET_XMIT_DROP, hdr_size;
+
+ if (!orig_node)
+ goto out;
+@@ -267,12 +267,16 @@ static int batadv_send_skb_unicast(struc
+ case BATADV_UNICAST:
+ if (!batadv_send_skb_prepare_unicast(skb, orig_node))
+ goto out;
++
++ hdr_size = sizeof(*unicast_packet);
+ break;
+ case BATADV_UNICAST_4ADDR:
+ if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, skb,
+ orig_node,
+ packet_subtype))
+ goto out;
++
++ hdr_size = sizeof(struct batadv_unicast_4addr_packet);
+ break;
+ default:
+ /* this function supports UNICAST and UNICAST_4ADDR only. It
+@@ -281,6 +285,7 @@ static int batadv_send_skb_unicast(struc
+ goto out;
+ }
+
++ ethhdr = (struct ethhdr *)(skb->data + hdr_size);
+ unicast_packet = (struct batadv_unicast_packet *)skb->data;
+
+ /* inform the destination node that we are still missing a correct route
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Simon Wunderlich <sw@simonwunderlich.de>
+Date: Sat, 8 Feb 2014 16:45:06 +0100
+Subject: batman-adv: fix potential orig_node reference leak
+
+From: Simon Wunderlich <sw@simonwunderlich.de>
+
+[ Upstream commit b2262df7fcf2c395eca564df83238e931d88d7bf ]
+
+Since batadv_orig_node_new() sets the refcount to two, assuming that
+the calling function will use a reference for putting the orig_node into
+a hash or similar, both references must be freed if initialization of
+the orig_node fails. Otherwise that object may be leaked in that error
+case.
+
+Reported-by: Antonio Quartulli <antonio@meshcoding.com>
+Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/bat_iv_ogm.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/batman-adv/bat_iv_ogm.c
++++ b/net/batman-adv/bat_iv_ogm.c
+@@ -256,6 +256,8 @@ batadv_iv_ogm_orig_get(struct batadv_pri
+ free_bcast_own:
+ kfree(orig_node->bat_iv.bcast_own);
+ free_orig_node:
++ /* free twice, as batadv_orig_node_new sets refcount to 2 */
++ batadv_orig_node_free_ref(orig_node);
+ batadv_orig_node_free_ref(orig_node);
+
+ return NULL;
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Antonio Quartulli <antonio@meshcoding.com>
+Date: Tue, 21 Jan 2014 11:22:05 +0100
+Subject: batman-adv: fix soft-interface MTU computation
+
+From: Antonio Quartulli <antonio@meshcoding.com>
+
+[ Upstream commit 930cd6e46eadce8b8ed2a232ee536e5fd286c152 ]
+
+The current MTU computation always returns a value
+smaller than 1500bytes even if the real interfaces
+have an MTU large enough to compensate the batman-adv
+overhead.
+
+Fix the computation by properly returning the highest
+admitted value.
+
+Introduced by a19d3d85e1b854e4a483a55d740a42458085560d
+("batman-adv: limit local translation table max size")
+
+Reported-by: Russell Senior <russell@personaltelco.net>
+Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/hard-interface.c | 22 ++++++++++++++--------
+ 1 file changed, 14 insertions(+), 8 deletions(-)
+
+--- a/net/batman-adv/hard-interface.c
++++ b/net/batman-adv/hard-interface.c
+@@ -244,7 +244,7 @@ int batadv_hardif_min_mtu(struct net_dev
+ {
+ struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+ const struct batadv_hard_iface *hard_iface;
+- int min_mtu = ETH_DATA_LEN;
++ int min_mtu = INT_MAX;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+@@ -259,8 +259,6 @@ int batadv_hardif_min_mtu(struct net_dev
+ }
+ rcu_read_unlock();
+
+- atomic_set(&bat_priv->packet_size_max, min_mtu);
+-
+ if (atomic_read(&bat_priv->fragmentation) == 0)
+ goto out;
+
+@@ -271,13 +269,21 @@ int batadv_hardif_min_mtu(struct net_dev
+ min_mtu = min_t(int, min_mtu, BATADV_FRAG_MAX_FRAG_SIZE);
+ min_mtu -= sizeof(struct batadv_frag_packet);
+ min_mtu *= BATADV_FRAG_MAX_FRAGMENTS;
+- atomic_set(&bat_priv->packet_size_max, min_mtu);
+-
+- /* with fragmentation enabled we can fragment external packets easily */
+- min_mtu = min_t(int, min_mtu, ETH_DATA_LEN);
+
+ out:
+- return min_mtu - batadv_max_header_len();
++ /* report to the other components the maximum amount of bytes that
++ * batman-adv can send over the wire (without considering the payload
++ * overhead). For example, this value is used by TT to compute the
++ * maximum local table table size
++ */
++ atomic_set(&bat_priv->packet_size_max, min_mtu);
++
++ /* the real soft-interface MTU is computed by removing the payload
++ * overhead from the maximum amount of bytes that was just computed.
++ *
++ * However batman-adv does not support MTUs bigger than ETH_DATA_LEN
++ */
++ return min_t(int, min_mtu - batadv_max_header_len(), ETH_DATA_LEN);
+ }
+
+ /* adjusts the MTU if a new interface with a smaller MTU appeared. */
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Antonio Quartulli <antonio@open-mesh.com>
+Date: Tue, 11 Feb 2014 17:05:06 +0100
+Subject: batman-adv: fix TT CRC computation by ensuring byte order
+
+From: Antonio Quartulli <antonio@open-mesh.com>
+
+[ Upstream commit a30e22ca8464c2dc573e0144a972221c2f06c2cd ]
+
+When computing the CRC on a 2byte variable the order of
+the bytes obviously alters the final result. This means
+that computing the CRC over the same value on two archs
+having different endianess leads to different numbers.
+
+The global and local translation table CRC computation
+routine makes this mistake while processing the clients
+VIDs. The result is a continuous CRC mismatching between
+nodes having different endianess.
+
+Fix this by converting the VID to Network Order before
+processing it. This guarantees that every node uses the same
+byte order.
+
+Introduced by 7ea7b4a142758deaf46c1af0ca9ceca6dd55138b
+("batman-adv: make the TT CRC logic VLAN specific")
+
+Reported-by: Russel Senior <russell@personaltelco.net>
+Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
+Tested-by: Russell Senior <russell@personaltelco.net>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/translation-table.c | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+--- a/net/batman-adv/translation-table.c
++++ b/net/batman-adv/translation-table.c
+@@ -1961,6 +1961,7 @@ static uint32_t batadv_tt_global_crc(str
+ struct hlist_head *head;
+ uint32_t i, crc_tmp, crc = 0;
+ uint8_t flags;
++ __be16 tmp_vid;
+
+ for (i = 0; i < hash->size; i++) {
+ head = &hash->table[i];
+@@ -1997,8 +1998,11 @@ static uint32_t batadv_tt_global_crc(str
+ orig_node))
+ continue;
+
+- crc_tmp = crc32c(0, &tt_common->vid,
+- sizeof(tt_common->vid));
++ /* use network order to read the VID: this ensures that
++ * every node reads the bytes in the same order.
++ */
++ tmp_vid = htons(tt_common->vid);
++ crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid));
+
+ /* compute the CRC on flags that have to be kept in sync
+ * among nodes
+@@ -2032,6 +2036,7 @@ static uint32_t batadv_tt_local_crc(stru
+ struct hlist_head *head;
+ uint32_t i, crc_tmp, crc = 0;
+ uint8_t flags;
++ __be16 tmp_vid;
+
+ for (i = 0; i < hash->size; i++) {
+ head = &hash->table[i];
+@@ -2050,8 +2055,11 @@ static uint32_t batadv_tt_local_crc(stru
+ if (tt_common->flags & BATADV_TT_CLIENT_NEW)
+ continue;
+
+- crc_tmp = crc32c(0, &tt_common->vid,
+- sizeof(tt_common->vid));
++ /* use network order to read the VID: this ensures that
++ * every node reads the bytes in the same order.
++ */
++ tmp_vid = htons(tt_common->vid);
++ crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid));
+
+ /* compute the CRC on flags that have to be kept in sync
+ * among nodes
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Antonio Quartulli <antonio@meshcoding.com>
+Date: Mon, 27 Jan 2014 12:23:28 +0100
+Subject: batman-adv: fix TT-TVLV parsing on OGM reception
+
+From: Antonio Quartulli <antonio@meshcoding.com>
+
+[ Upstream commit e889241f45f9cecbc84a6ffed577083ab52e62ee ]
+
+When accessing a TT-TVLV container in the OGM RX path
+the variable pointing to the list of changes to apply is
+altered by mistake.
+
+This makes the TT component read data at the wrong position
+in the OGM packet buffer.
+
+Fix it by removing the bogus pointer alteration.
+
+Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/translation-table.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/net/batman-adv/translation-table.c
++++ b/net/batman-adv/translation-table.c
+@@ -3204,7 +3204,6 @@ static void batadv_tt_update_orig(struct
+
+ spin_lock_bh(&orig_node->tt_lock);
+
+- tt_change = (struct batadv_tvlv_tt_change *)tt_buff;
+ batadv_tt_update_changes(bat_priv, orig_node, tt_num_changes,
+ ttvn, tt_change);
+
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Antonio Quartulli <antonio@open-mesh.com>
+Date: Tue, 11 Feb 2014 17:05:07 +0100
+Subject: batman-adv: free skb on TVLV parsing success
+
+From: Antonio Quartulli <antonio@open-mesh.com>
+
+[ Upstream commit 05c3c8a636aa9ee35ce13f65afc5b665615cc786 ]
+
+When the TVLV parsing routine succeed the skb is left
+untouched thus leading to a memory leak.
+
+Fix this by consuming the skb in case of success.
+
+Introduced by ef26157747d42254453f6b3ac2bd8bd3c53339c3
+("batman-adv: tvlv - basic infrastructure")
+
+Reported-by: Russel Senior <russell@personaltelco.net>
+Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
+Tested-by: Russell Senior <russell@personaltelco.net>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/routing.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/batman-adv/routing.c
++++ b/net/batman-adv/routing.c
+@@ -1063,6 +1063,8 @@ int batadv_recv_unicast_tvlv(struct sk_b
+
+ if (ret != NET_RX_SUCCESS)
+ ret = batadv_route_unicast_packet(skb, recv_if);
++ else
++ consume_skb(skb);
+
+ return ret;
+ }
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Antonio Quartulli <antonio@meshcoding.com>
+Date: Thu, 30 Jan 2014 00:12:24 +0100
+Subject: batman-adv: properly check pskb_may_pull return value
+
+From: Antonio Quartulli <antonio@meshcoding.com>
+
+[ Upstream commit f1791425cf0bcda43ab9a9a37df1ad3ccb1f6654 ]
+
+pskb_may_pull() returns 1 on success and 0 in case of failure,
+therefore checking for the return value being negative does
+not make sense at all.
+
+This way if the function fails we will probably read beyond the current
+skb data buffer. Fix this by doing the proper check.
+
+Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/routing.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/batman-adv/routing.c
++++ b/net/batman-adv/routing.c
+@@ -833,7 +833,7 @@ static int batadv_check_unicast_ttvn(str
+ int is_old_ttvn;
+
+ /* check if there is enough data before accessing it */
+- if (pskb_may_pull(skb, hdr_len + ETH_HLEN) < 0)
++ if (!pskb_may_pull(skb, hdr_len + ETH_HLEN))
+ return 0;
+
+ /* create a copy of the skb (in case of for re-routing) to modify it. */
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Antonio Quartulli <antonio@meshcoding.com>
+Date: Tue, 28 Jan 2014 02:06:47 +0100
+Subject: batman-adv: release vlan object after checking the CRC
+
+From: Antonio Quartulli <antonio@meshcoding.com>
+
+[ Upstream commit 91c2b1a9f680ff105369d49abc7e19ca7efb33e1 ]
+
+There is a refcounter unbalance in the CRC checking routine
+invoked on OGM reception. A vlan object is retrieved (thus
+its refcounter is increased by one) but it is never properly
+released. This leads to a memleak because the vlan object
+will never be free'd.
+
+Fix this by releasing the vlan object after having read the
+CRC.
+
+Reported-by: Russell Senior <russell@personaltelco.net>
+Reported-by: Daniel <daniel@makrotopia.org>
+Reported-by: cmsv <cmsv@wirelesspt.net>
+Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/translation-table.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/net/batman-adv/translation-table.c
++++ b/net/batman-adv/translation-table.c
+@@ -2248,6 +2248,7 @@ static bool batadv_tt_global_check_crc(s
+ {
+ struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp;
+ struct batadv_orig_node_vlan *vlan;
++ uint32_t crc;
+ int i;
+
+ /* check if each received CRC matches the locally stored one */
+@@ -2267,7 +2268,10 @@ static bool batadv_tt_global_check_crc(s
+ if (!vlan)
+ return false;
+
+- if (vlan->tt.crc != ntohl(tt_vlan_tmp->crc))
++ crc = vlan->tt.crc;
++ batadv_orig_node_vlan_free_ref(vlan);
++
++ if (crc != ntohl(tt_vlan_tmp->crc))
+ return false;
+ }
+
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Jiri Bohac <jiri@boha.cz>
+Date: Fri, 14 Feb 2014 18:13:50 +0100
+Subject: bonding: 802.3ad: make aggregator_identifier bond-private
+
+From: Jiri Bohac <jiri@boha.cz>
+
+[ Upstream commit 163c8ff30dbe473abfbb24a7eac5536c87f3baa9 ]
+
+aggregator_identifier is used to assign unique aggregator identifiers
+to aggregators of a bond during device enslaving.
+
+aggregator_identifier is currently a global variable that is zeroed in
+bond_3ad_initialize().
+
+This sequence will lead to duplicate aggregator identifiers for eth1 and eth3:
+
+create bond0
+change bond0 mode to 802.3ad
+enslave eth0 to bond0 //eth0 gets agg id 1
+enslave eth1 to bond0 //eth1 gets agg id 2
+create bond1
+change bond1 mode to 802.3ad
+enslave eth2 to bond1 //aggregator_identifier is reset to 0
+ //eth2 gets agg id 1
+enslave eth3 to bond0 //eth3 gets agg id 2
+
+Fix this by making aggregator_identifier private to the bond.
+
+Signed-off-by: Jiri Bohac <jbohac@suse.cz>
+Acked-by: Veaceslav Falico <vfalico@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_3ad.c | 6 ++----
+ drivers/net/bonding/bond_3ad.h | 1 +
+ 2 files changed, 3 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -1806,8 +1806,6 @@ void bond_3ad_initiate_agg_selection(str
+ BOND_AD_INFO(bond).agg_select_timer = timeout;
+ }
+
+-static u16 aggregator_identifier;
+-
+ /**
+ * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures
+ * @bond: bonding struct to work on
+@@ -1821,7 +1819,7 @@ void bond_3ad_initialize(struct bonding
+ if (MAC_ADDRESS_COMPARE(&(BOND_AD_INFO(bond).system.sys_mac_addr),
+ bond->dev->dev_addr)) {
+
+- aggregator_identifier = 0;
++ BOND_AD_INFO(bond).aggregator_identifier = 0;
+
+ BOND_AD_INFO(bond).system.sys_priority = 0xFFFF;
+ BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr);
+@@ -1892,7 +1890,7 @@ int bond_3ad_bind_slave(struct slave *sl
+ ad_initialize_agg(aggregator);
+
+ aggregator->aggregator_mac_address = *((struct mac_addr *)bond->dev->dev_addr);
+- aggregator->aggregator_identifier = (++aggregator_identifier);
++ aggregator->aggregator_identifier = ++BOND_AD_INFO(bond).aggregator_identifier;
+ aggregator->slave = slave;
+ aggregator->is_active = 0;
+ aggregator->num_of_ports = 0;
+--- a/drivers/net/bonding/bond_3ad.h
++++ b/drivers/net/bonding/bond_3ad.h
+@@ -253,6 +253,7 @@ struct ad_system {
+ struct ad_bond_info {
+ struct ad_system system; /* 802.3ad system structure */
+ u32 agg_select_timer; // Timer to select aggregator after all adapter's hand shakes
++ u16 aggregator_identifier;
+ };
+
+ struct ad_slave_info {
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Cong Wang <cwang@twopensource.com>
+Date: Thu, 6 Feb 2014 15:00:52 -0800
+Subject: bridge: fix netconsole setup over bridge
+
+From: Cong Wang <cwang@twopensource.com>
+
+[ Upstream commit dbe173079ab58a444e12dbebe96f5aec1e0bed1a ]
+
+Commit 93d8bf9fb8f3 ("bridge: cleanup netpoll code") introduced
+a check in br_netpoll_enable(), but this check is incorrect for
+br_netpoll_setup(). This patch moves the code after the check
+into __br_netpoll_enable() and calls it in br_netpoll_setup().
+For br_add_if(), the check is still needed.
+
+Fixes: 93d8bf9fb8f3 ("bridge: cleanup netpoll code")
+Cc: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Cc: Stephen Hemminger <stephen@networkplumber.org>
+Cc: David S. Miller <davem@davemloft.net>
+Signed-off-by: Cong Wang <cwang@twopensource.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Acked-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Tested-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_device.c | 51 ++++++++++++++++++++++++++-----------------------
+ 1 file changed, 28 insertions(+), 23 deletions(-)
+
+--- a/net/bridge/br_device.c
++++ b/net/bridge/br_device.c
+@@ -226,6 +226,33 @@ static void br_netpoll_cleanup(struct ne
+ br_netpoll_disable(p);
+ }
+
++static int __br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
++{
++ struct netpoll *np;
++ int err;
++
++ np = kzalloc(sizeof(*p->np), gfp);
++ if (!np)
++ return -ENOMEM;
++
++ err = __netpoll_setup(np, p->dev, gfp);
++ if (err) {
++ kfree(np);
++ return err;
++ }
++
++ p->np = np;
++ return err;
++}
++
++int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
++{
++ if (!p->br->dev->npinfo)
++ return 0;
++
++ return __br_netpoll_enable(p, gfp);
++}
++
+ static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
+ gfp_t gfp)
+ {
+@@ -236,7 +263,7 @@ static int br_netpoll_setup(struct net_d
+ list_for_each_entry(p, &br->port_list, list) {
+ if (!p->dev)
+ continue;
+- err = br_netpoll_enable(p, gfp);
++ err = __br_netpoll_enable(p, gfp);
+ if (err)
+ goto fail;
+ }
+@@ -249,28 +276,6 @@ fail:
+ goto out;
+ }
+
+-int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
+-{
+- struct netpoll *np;
+- int err;
+-
+- if (!p->br->dev->npinfo)
+- return 0;
+-
+- np = kzalloc(sizeof(*p->np), gfp);
+- if (!np)
+- return -ENOMEM;
+-
+- err = __netpoll_setup(np, p->dev, gfp);
+- if (err) {
+- kfree(np);
+- return err;
+- }
+-
+- p->np = np;
+- return err;
+-}
+-
+ void br_netpoll_disable(struct net_bridge_port *p)
+ {
+ struct netpoll *np = p->np;
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Oliver Hartkopp <socketcan@hartkopp.net>
+Date: Thu, 30 Jan 2014 10:11:28 +0100
+Subject: can: add destructor for self generated skbs
+
+From: Oliver Hartkopp <socketcan@hartkopp.net>
+
+[ Upstream commit 0ae89beb283a0db5980d1d4781c7d7be2f2810d6 ]
+
+Self generated skbuffs in net/can/bcm.c are setting a skb->sk reference but
+no explicit destructor which is enforced since Linux 3.11 with commit
+376c7311bdb6 (net: add a temporary sanity check in skb_orphan()).
+
+This patch adds some helper functions to make sure that a destructor is
+properly defined when a sock reference is assigned to a CAN related skb.
+To create an unshared skb owned by the original sock a common helper function
+has been introduced to replace open coded functions to create CAN echo skbs.
+
+Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
+Tested-by: Andre Naujoks <nautsch2@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/can/dev.c | 15 +++------------
+ drivers/net/can/janz-ican3.c | 18 ++++--------------
+ drivers/net/can/vcan.c | 9 ++++-----
+ include/linux/can/skb.h | 38 ++++++++++++++++++++++++++++++++++++++
+ net/can/af_can.c | 3 ++-
+ net/can/bcm.c | 4 ++--
+ 6 files changed, 53 insertions(+), 34 deletions(-)
+
+--- a/drivers/net/can/dev.c
++++ b/drivers/net/can/dev.c
+@@ -324,19 +324,10 @@ void can_put_echo_skb(struct sk_buff *sk
+ }
+
+ if (!priv->echo_skb[idx]) {
+- struct sock *srcsk = skb->sk;
+
+- if (atomic_read(&skb->users) != 1) {
+- struct sk_buff *old_skb = skb;
+-
+- skb = skb_clone(old_skb, GFP_ATOMIC);
+- kfree_skb(old_skb);
+- if (!skb)
+- return;
+- } else
+- skb_orphan(skb);
+-
+- skb->sk = srcsk;
++ skb = can_create_echo_skb(skb);
++ if (!skb)
++ return;
+
+ /* make settings for echo to reduce code in irq context */
+ skb->protocol = htons(ETH_P_CAN);
+--- a/drivers/net/can/janz-ican3.c
++++ b/drivers/net/can/janz-ican3.c
+@@ -19,6 +19,7 @@
+ #include <linux/netdevice.h>
+ #include <linux/can.h>
+ #include <linux/can/dev.h>
++#include <linux/can/skb.h>
+ #include <linux/can/error.h>
+
+ #include <linux/mfd/janz.h>
+@@ -1134,20 +1135,9 @@ static void ican3_handle_message(struct
+ */
+ static void ican3_put_echo_skb(struct ican3_dev *mod, struct sk_buff *skb)
+ {
+- struct sock *srcsk = skb->sk;
+-
+- if (atomic_read(&skb->users) != 1) {
+- struct sk_buff *old_skb = skb;
+-
+- skb = skb_clone(old_skb, GFP_ATOMIC);
+- kfree_skb(old_skb);
+- if (!skb)
+- return;
+- } else {
+- skb_orphan(skb);
+- }
+-
+- skb->sk = srcsk;
++ skb = can_create_echo_skb(skb);
++ if (!skb)
++ return;
+
+ /* save this skb for tx interrupt echo handling */
+ skb_queue_tail(&mod->echoq, skb);
+--- a/drivers/net/can/vcan.c
++++ b/drivers/net/can/vcan.c
+@@ -46,6 +46,7 @@
+ #include <linux/if_ether.h>
+ #include <linux/can.h>
+ #include <linux/can/dev.h>
++#include <linux/can/skb.h>
+ #include <linux/slab.h>
+ #include <net/rtnetlink.h>
+
+@@ -109,25 +110,23 @@ static netdev_tx_t vcan_tx(struct sk_buf
+ stats->rx_packets++;
+ stats->rx_bytes += cfd->len;
+ }
+- kfree_skb(skb);
++ consume_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
+ /* perform standard echo handling for CAN network interfaces */
+
+ if (loop) {
+- struct sock *srcsk = skb->sk;
+
+- skb = skb_share_check(skb, GFP_ATOMIC);
++ skb = can_create_echo_skb(skb);
+ if (!skb)
+ return NETDEV_TX_OK;
+
+ /* receive with packet counting */
+- skb->sk = srcsk;
+ vcan_rx(skb, dev);
+ } else {
+ /* no looped packets => no counting */
+- kfree_skb(skb);
++ consume_skb(skb);
+ }
+ return NETDEV_TX_OK;
+ }
+--- a/include/linux/can/skb.h
++++ b/include/linux/can/skb.h
+@@ -11,7 +11,9 @@
+ #define CAN_SKB_H
+
+ #include <linux/types.h>
++#include <linux/skbuff.h>
+ #include <linux/can.h>
++#include <net/sock.h>
+
+ /*
+ * The struct can_skb_priv is used to transport additional information along
+@@ -42,4 +44,40 @@ static inline void can_skb_reserve(struc
+ skb_reserve(skb, sizeof(struct can_skb_priv));
+ }
+
++static inline void can_skb_destructor(struct sk_buff *skb)
++{
++ sock_put(skb->sk);
++}
++
++static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk)
++{
++ if (sk) {
++ sock_hold(sk);
++ skb->destructor = can_skb_destructor;
++ skb->sk = sk;
++ }
++}
++
++/*
++ * returns an unshared skb owned by the original sock to be echo'ed back
++ */
++static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb)
++{
++ if (skb_shared(skb)) {
++ struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
++
++ if (likely(nskb)) {
++ can_skb_set_owner(nskb, skb->sk);
++ consume_skb(skb);
++ return nskb;
++ } else {
++ kfree_skb(skb);
++ return NULL;
++ }
++ }
++
++ /* we can assume to have an unshared skb with proper owner */
++ return skb;
++}
++
+ #endif /* CAN_SKB_H */
+--- a/net/can/af_can.c
++++ b/net/can/af_can.c
+@@ -57,6 +57,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/can.h>
+ #include <linux/can/core.h>
++#include <linux/can/skb.h>
+ #include <linux/ratelimit.h>
+ #include <net/net_namespace.h>
+ #include <net/sock.h>
+@@ -290,7 +291,7 @@ int can_send(struct sk_buff *skb, int lo
+ return -ENOMEM;
+ }
+
+- newskb->sk = skb->sk;
++ can_skb_set_owner(newskb, skb->sk);
+ newskb->ip_summed = CHECKSUM_UNNECESSARY;
+ newskb->pkt_type = PACKET_BROADCAST;
+ }
+--- a/net/can/bcm.c
++++ b/net/can/bcm.c
+@@ -268,7 +268,7 @@ static void bcm_can_tx(struct bcm_op *op
+
+ /* send with loopback */
+ skb->dev = dev;
+- skb->sk = op->sk;
++ can_skb_set_owner(skb, op->sk);
+ can_send(skb, 1);
+
+ /* update statistics */
+@@ -1223,7 +1223,7 @@ static int bcm_tx_send(struct msghdr *ms
+
+ can_skb_prv(skb)->ifindex = dev->ifindex;
+ skb->dev = dev;
+- skb->sk = sk;
++ can_skb_set_owner(skb, sk);
+ err = can_send(skb, 1); /* send with loopback */
+ dev_put(dev);
+
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Mon, 17 Feb 2014 14:22:21 +0100
+Subject: gre: add link local route when local addr is any
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+[ Upstream commit 08b44656c08c8c2f73cdac2a058be2880e3361f2 ]
+
+This bug was reported by Steinar H. Gunderson and was introduced by commit
+f7cb8886335d ("sit/gre6: don't try to add the same route two times").
+
+root@morgental:~# ip tunnel add foo mode gre remote 1.2.3.4 ttl 64
+root@morgental:~# ip link set foo up mtu 1468
+root@morgental:~# ip -6 route show dev foo
+fe80::/64 proto kernel metric 256
+
+but after the above commit, no such route shows up.
+
+There is no link local route because dev->dev_addr is 0 (because local ipv4
+address is 0), hence no link local address is configured.
+
+In this scenario, the link local address is added manually: 'ip -6 addr add
+fe80::1 dev foo' and because prefix is /128, no link local route is added by the
+kernel.
+
+Even if the right things to do is to add the link local address with a /64
+prefix, we need to restore the previous behavior to avoid breaking userpace.
+
+Reported-by: Steinar H. Gunderson <sesse@samfundet.no>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -2726,6 +2726,8 @@ static void addrconf_gre_config(struct n
+ ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
+ if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
+ addrconf_add_linklocal(idev, &addr);
++ else
++ addrconf_prefix_route(&addr, 64, dev, 0, 0);
+ }
+ #endif
+
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Wed, 12 Feb 2014 16:54:27 -0800
+Subject: hyperv: Fix the carrier status setting
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Upstream commit 891de74d693bb4fefe2efcc6432a4a9a9bee561e ]
+
+Without this patch, the "cat /sys/class/net/ethN/operstate" shows
+"unknown", and "ethtool ethN" shows "Link detected: yes", when VM
+boots up with or without vNIC connected.
+
+This patch fixed the problem.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 53 ++++++++++++++++++++++++++++------------
+ 1 file changed, 38 insertions(+), 15 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -89,8 +89,12 @@ static int netvsc_open(struct net_device
+ {
+ struct net_device_context *net_device_ctx = netdev_priv(net);
+ struct hv_device *device_obj = net_device_ctx->device_ctx;
++ struct netvsc_device *nvdev;
++ struct rndis_device *rdev;
+ int ret = 0;
+
++ netif_carrier_off(net);
++
+ /* Open up the device */
+ ret = rndis_filter_open(device_obj);
+ if (ret != 0) {
+@@ -100,6 +104,11 @@ static int netvsc_open(struct net_device
+
+ netif_start_queue(net);
+
++ nvdev = hv_get_drvdata(device_obj);
++ rdev = nvdev->extension;
++ if (!rdev->link_state)
++ netif_carrier_on(net);
++
+ return ret;
+ }
+
+@@ -230,23 +239,24 @@ void netvsc_linkstatus_callback(struct h
+ struct net_device *net;
+ struct net_device_context *ndev_ctx;
+ struct netvsc_device *net_device;
++ struct rndis_device *rdev;
+
+ net_device = hv_get_drvdata(device_obj);
++ rdev = net_device->extension;
++
++ rdev->link_state = status != 1;
++
+ net = net_device->ndev;
+
+- if (!net) {
+- netdev_err(net, "got link status but net device "
+- "not initialized yet\n");
++ if (!net || net->reg_state != NETREG_REGISTERED)
+ return;
+- }
+
++ ndev_ctx = netdev_priv(net);
+ if (status == 1) {
+- netif_carrier_on(net);
+- ndev_ctx = netdev_priv(net);
+ schedule_delayed_work(&ndev_ctx->dwork, 0);
+ schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20));
+ } else {
+- netif_carrier_off(net);
++ schedule_delayed_work(&ndev_ctx->dwork, 0);
+ }
+ }
+
+@@ -389,17 +399,35 @@ static const struct net_device_ops devic
+ * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add
+ * another netif_notify_peers() into a delayed work, otherwise GARP packet
+ * will not be sent after quick migration, and cause network disconnection.
++ * Also, we update the carrier status here.
+ */
+-static void netvsc_send_garp(struct work_struct *w)
++static void netvsc_link_change(struct work_struct *w)
+ {
+ struct net_device_context *ndev_ctx;
+ struct net_device *net;
+ struct netvsc_device *net_device;
++ struct rndis_device *rdev;
++ bool notify;
++
++ rtnl_lock();
+
+ ndev_ctx = container_of(w, struct net_device_context, dwork.work);
+ net_device = hv_get_drvdata(ndev_ctx->device_ctx);
++ rdev = net_device->extension;
+ net = net_device->ndev;
+- netdev_notify_peers(net);
++
++ if (rdev->link_state) {
++ netif_carrier_off(net);
++ notify = false;
++ } else {
++ netif_carrier_on(net);
++ notify = true;
++ }
++
++ rtnl_unlock();
++
++ if (notify)
++ netdev_notify_peers(net);
+ }
+
+
+@@ -415,13 +443,10 @@ static int netvsc_probe(struct hv_device
+ if (!net)
+ return -ENOMEM;
+
+- /* Set initial state */
+- netif_carrier_off(net);
+-
+ net_device_ctx = netdev_priv(net);
+ net_device_ctx->device_ctx = dev;
+ hv_set_drvdata(dev, net);
+- INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_send_garp);
++ INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
+ INIT_WORK(&net_device_ctx->work, do_set_multicast);
+
+ net->netdev_ops = &device_ops;
+@@ -444,8 +469,6 @@ static int netvsc_probe(struct hv_device
+ }
+ memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
+
+- netif_carrier_on(net);
+-
+ ret = register_netdev(net);
+ if (ret != 0) {
+ pr_err("Unable to register netdev.\n");
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Duan Jiong <duanj.fnst@cn.fujitsu.com>
+Date: Mon, 17 Feb 2014 15:23:43 +0800
+Subject: ipv4: fix counter in_slow_tot
+
+From: Duan Jiong <duanj.fnst@cn.fujitsu.com>
+
+[ Upstream commit a6254864c08109c66a194612585afc0439005286 ]
+
+since commit 89aef8921bf("ipv4: Delete routing cache."), the counter
+in_slow_tot can't work correctly.
+
+The counter in_slow_tot increase by one when fib_lookup() return successfully
+in ip_route_input_slow(), but actually the dst struct maybe not be created and
+cached, so we can increase in_slow_tot after the dst struct is created.
+
+Signed-off-by: Duan Jiong <duanj.fnst@cn.fujitsu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1600,6 +1600,7 @@ static int __mkroute_input(struct sk_buf
+ rth->rt_gateway = 0;
+ rth->rt_uses_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
++ RT_CACHE_STAT_INC(in_slow_tot);
+
+ rth->dst.input = ip_forward;
+ rth->dst.output = ip_output;
+@@ -1701,8 +1702,6 @@ static int ip_route_input_slow(struct sk
+ if (err != 0)
+ goto no_route;
+
+- RT_CACHE_STAT_INC(in_slow_tot);
+-
+ if (res.type == RTN_BROADCAST)
+ goto brd_input;
+
+@@ -1771,6 +1770,7 @@ local_input:
+ rth->rt_gateway = 0;
+ rth->rt_uses_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
++ RT_CACHE_STAT_INC(in_slow_tot);
+ if (res.type == RTN_UNREACHABLE) {
+ rth->dst.input= ip_error;
+ rth->dst.error= -err;
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Geert Uytterhoeven <geert@linux-m68k.org>
+Date: Wed, 5 Feb 2014 08:38:25 +0100
+Subject: ipv4: Fix runtime WARNING in rtmsg_ifa()
+
+From: Geert Uytterhoeven <geert@linux-m68k.org>
+
+[ Upstream commit 63b5f152eb4a5bb79b9caf7ec37b4201d12f6e66 ]
+
+On m68k/ARAnyM:
+
+WARNING: CPU: 0 PID: 407 at net/ipv4/devinet.c:1599 0x316a99()
+Modules linked in:
+CPU: 0 PID: 407 Comm: ifconfig Not tainted
+3.13.0-atari-09263-g0c71d68014d1 #1378
+Stack from 10c4fdf0:
+ 10c4fdf0 002ffabb 000243e8 00000000 008ced6c 00024416 00316a99 0000063f
+ 00316a99 00000009 00000000 002501b4 00316a99 0000063f c0a86117 00000080
+ c0a86117 00ad0c90 00250a5a 00000014 00ad0c90 00000000 00000000 00000001
+ 00b02dd0 00356594 00000000 00356594 c0a86117 eff6c9e4 008ced6c 00000002
+ 008ced60 0024f9b4 00250b52 00ad0c90 00000000 00000000 00252390 00ad0c90
+ eff6c9e4 0000004f 00000000 00000000 eff6c9e4 8000e25c eff6c9e4 80001020
+Call Trace: [<000243e8>] warn_slowpath_common+0x52/0x6c
+ [<00024416>] warn_slowpath_null+0x14/0x1a
+ [<002501b4>] rtmsg_ifa+0xdc/0xf0
+ [<00250a5a>] __inet_insert_ifa+0xd6/0x1c2
+ [<0024f9b4>] inet_abc_len+0x0/0x42
+ [<00250b52>] inet_insert_ifa+0xc/0x12
+ [<00252390>] devinet_ioctl+0x2ae/0x5d6
+
+Adding some debugging code reveals that net_fill_ifaddr() fails in
+
+ put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
+ preferred, valid))
+
+nla_put complains:
+
+ lib/nlattr.c:454: skb_tailroom(skb) = 12, nla_total_size(attrlen) = 20
+
+Apparently commit 5c766d642bcaffd0c2a5b354db2068515b3846cf ("ipv4:
+introduce address lifetime") forgot to take into account the addition of
+struct ifa_cacheinfo in inet_nlmsg_size(). Hence add it, like is already
+done for ipv6.
+
+Suggested-by: Cong Wang <cwang@twopensource.com>
+Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Signed-off-by: Cong Wang <cwang@twopensource.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/devinet.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/devinet.c
++++ b/net/ipv4/devinet.c
+@@ -1435,7 +1435,8 @@ static size_t inet_nlmsg_size(void)
+ + nla_total_size(4) /* IFA_ADDRESS */
+ + nla_total_size(4) /* IFA_LOCAL */
+ + nla_total_size(4) /* IFA_BROADCAST */
+- + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
++ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
++ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
+ }
+
+ static inline u32 cstamp_delta(unsigned long cstamp)
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 21 Feb 2014 20:46:38 +0100
+Subject: net: add and use skb_gso_transport_seglen()
+
+From: Florian Westphal <fw@strlen.de>
+
+commit de960aa9ab4decc3304959f69533eef64d05d8e8 upstream.
+
+This moves part of Eric Dumazets skb_gso_seglen helper from tbf sched to
+skbuff core so it may be reused by upcoming ip forwarding path patch.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h | 1 +
+ net/core/skbuff.c | 25 +++++++++++++++++++++++++
+ net/sched/sch_tbf.c | 13 +++----------
+ 3 files changed, 29 insertions(+), 10 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -2371,6 +2371,7 @@ void skb_copy_and_csum_dev(const struct
+ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
+ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
+ void skb_scrub_packet(struct sk_buff *skb, bool xnet);
++unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
+ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
+
+ struct skb_checksum_ops {
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -47,6 +47,8 @@
+ #include <linux/in.h>
+ #include <linux/inet.h>
+ #include <linux/slab.h>
++#include <linux/tcp.h>
++#include <linux/udp.h>
+ #include <linux/netdevice.h>
+ #ifdef CONFIG_NET_CLS_ACT
+ #include <net/pkt_sched.h>
+@@ -3562,3 +3564,26 @@ void skb_scrub_packet(struct sk_buff *sk
+ nf_reset_trace(skb);
+ }
+ EXPORT_SYMBOL_GPL(skb_scrub_packet);
++
++/**
++ * skb_gso_transport_seglen - Return length of individual segments of a gso packet
++ *
++ * @skb: GSO skb
++ *
++ * skb_gso_transport_seglen is used to determine the real size of the
++ * individual segments, including Layer4 headers (TCP/UDP).
++ *
++ * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
++ */
++unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
++{
++ const struct skb_shared_info *shinfo = skb_shinfo(skb);
++ unsigned int hdr_len;
++
++ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
++ hdr_len = tcp_hdrlen(skb);
++ else
++ hdr_len = sizeof(struct udphdr);
++ return hdr_len + shinfo->gso_size;
++}
++EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
+--- a/net/sched/sch_tbf.c
++++ b/net/sched/sch_tbf.c
+@@ -21,7 +21,6 @@
+ #include <net/netlink.h>
+ #include <net/sch_generic.h>
+ #include <net/pkt_sched.h>
+-#include <net/tcp.h>
+
+
+ /* Simple Token Bucket Filter.
+@@ -148,16 +147,10 @@ static u64 psched_ns_t2l(const struct ps
+ * Return length of individual segments of a gso packet,
+ * including all headers (MAC, IP, TCP/UDP)
+ */
+-static unsigned int skb_gso_seglen(const struct sk_buff *skb)
++static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
+ {
+ unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+- const struct skb_shared_info *shinfo = skb_shinfo(skb);
+-
+- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+- hdr_len += tcp_hdrlen(skb);
+- else
+- hdr_len += sizeof(struct udphdr);
+- return hdr_len + shinfo->gso_size;
++ return hdr_len + skb_gso_transport_seglen(skb);
+ }
+
+ /* GSO packet is too big, segment it so that tbf can transmit
+@@ -202,7 +195,7 @@ static int tbf_enqueue(struct sk_buff *s
+ int ret;
+
+ if (qdisc_pkt_len(skb) > q->max_size) {
+- if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size)
++ if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size)
+ return tbf_segment(skb, sch);
+ return qdisc_reshape_fail(skb, sch);
+ }
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Emil Goode <emilgoode@gmail.com>
+Date: Thu, 13 Feb 2014 19:30:39 +0100
+Subject: net: asix: add missing flag to struct driver_info
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Emil Goode <emilgoode@gmail.com>
+
+[ Upstream commit d43ff4cd798911736fb39025ec8004284b1b0bc2 ]
+
+The struct driver_info ax88178_info is assigned the function
+asix_rx_fixup_common as it's rx_fixup callback. This means that
+FLAG_MULTI_PACKET must be set as this function is cloning the
+data and calling usbnet_skb_return. Not setting this flag leads
+to usbnet_skb_return beeing called a second time from within
+the rx_process function in the usbnet module.
+
+Signed-off-by: Emil Goode <emilgoode@gmail.com>
+Reported-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/asix_devices.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/usb/asix_devices.c
++++ b/drivers/net/usb/asix_devices.c
+@@ -918,7 +918,8 @@ static const struct driver_info ax88178_
+ .status = asix_status,
+ .link_reset = ax88178_link_reset,
+ .reset = ax88178_reset,
+- .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR,
++ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR |
++ FLAG_MULTI_PACKET,
+ .rx_fixup = asix_rx_fixup_common,
+ .tx_fixup = asix_tx_fixup,
+ };
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 21 Feb 2014 20:46:39 +0100
+Subject: net: core: introduce netif_skb_dev_features
+
+From: Florian Westphal <fw@strlen.de>
+
+commit d206940319c41df4299db75ed56142177bb2e5f6 upstream.
+
+Will be used by upcoming ipv4 forward path change that needs to
+determine feature mask using skb->dst->dev instead of skb->dev.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netdevice.h | 7 ++++++-
+ net/core/dev.c | 22 ++++++++++++----------
+ 2 files changed, 18 insertions(+), 11 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -2984,7 +2984,12 @@ void netdev_change_features(struct net_d
+ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
+ struct net_device *dev);
+
+-netdev_features_t netif_skb_features(struct sk_buff *skb);
++netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
++ const struct net_device *dev);
++static inline netdev_features_t netif_skb_features(struct sk_buff *skb)
++{
++ return netif_skb_dev_features(skb, skb->dev);
++}
+
+ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
+ {
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2404,7 +2404,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
+ * 2. No high memory really exists on this machine.
+ */
+
+-static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
++static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
+ {
+ #ifdef CONFIG_HIGHMEM
+ int i;
+@@ -2484,34 +2484,36 @@ static int dev_gso_segment(struct sk_buf
+ }
+
+ static netdev_features_t harmonize_features(struct sk_buff *skb,
+- netdev_features_t features)
++ const struct net_device *dev,
++ netdev_features_t features)
+ {
+ if (skb->ip_summed != CHECKSUM_NONE &&
+ !can_checksum_protocol(features, skb_network_protocol(skb))) {
+ features &= ~NETIF_F_ALL_CSUM;
+- } else if (illegal_highdma(skb->dev, skb)) {
++ } else if (illegal_highdma(dev, skb)) {
+ features &= ~NETIF_F_SG;
+ }
+
+ return features;
+ }
+
+-netdev_features_t netif_skb_features(struct sk_buff *skb)
++netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
++ const struct net_device *dev)
+ {
+ __be16 protocol = skb->protocol;
+- netdev_features_t features = skb->dev->features;
++ netdev_features_t features = dev->features;
+
+- if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
++ if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
+ features &= ~NETIF_F_GSO_MASK;
+
+ if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
+ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+ protocol = veh->h_vlan_encapsulated_proto;
+ } else if (!vlan_tx_tag_present(skb)) {
+- return harmonize_features(skb, features);
++ return harmonize_features(skb, dev, features);
+ }
+
+- features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
++ features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX);
+
+ if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
+@@ -2519,9 +2521,9 @@ netdev_features_t netif_skb_features(str
+ NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX;
+
+- return harmonize_features(skb, features);
++ return harmonize_features(skb, dev, features);
+ }
+-EXPORT_SYMBOL(netif_skb_features);
++EXPORT_SYMBOL(netif_skb_dev_features);
+
+ /*
+ * Returns true if either:
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Maciej Żenczykowski <maze@google.com>
+Date: Fri, 7 Feb 2014 16:23:48 -0800
+Subject: net: fix 'ip rule' iif/oif device rename
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maciej Żenczykowski <maze@google.com>
+
+[ Upstream commit 946c032e5a53992ea45e062ecb08670ba39b99e3 ]
+
+ip rules with iif/oif references do not update:
+(detach/attach) across interface renames.
+
+Signed-off-by: Maciej Żenczykowski <maze@google.com>
+CC: Willem de Bruijn <willemb@google.com>
+CC: Eric Dumazet <edumazet@google.com>
+CC: Chris Davis <chrismd@google.com>
+CC: Carlo Contavalli <ccontavalli@google.com>
+
+Google-Bug-Id: 12936021
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/fib_rules.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/net/core/fib_rules.c
++++ b/net/core/fib_rules.c
+@@ -745,6 +745,13 @@ static int fib_rules_event(struct notifi
+ attach_rules(&ops->rules_list, dev);
+ break;
+
++ case NETDEV_CHANGENAME:
++ list_for_each_entry(ops, &net->rules_ops, list) {
++ detach_rules(&ops->rules_list, dev);
++ attach_rules(&ops->rules_list, dev);
++ }
++ break;
++
+ case NETDEV_UNREGISTER:
+ list_for_each_entry(ops, &net->rules_ops, list)
+ detach_rules(&ops->rules_list, dev);
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 21 Feb 2014 20:46:40 +0100
+Subject: net: ip, ipv6: handle gso skbs in forwarding path
+
+From: Florian Westphal <fw@strlen.de>
+
+commit fe6cc55f3a9a053482a76f5a6b2257cee51b4663 upstream.
+
+Marcelo Ricardo Leitner reported problems when the forwarding link path
+has a lower mtu than the incoming one if the inbound interface supports GRO.
+
+Given:
+Host <mtu1500> R1 <mtu1200> R2
+
+Host sends tcp stream which is routed via R1 and R2. R1 performs GRO.
+
+In this case, the kernel will fail to send ICMP fragmentation needed
+messages (or pkt too big for ipv6), as GSO packets currently bypass dstmtu
+checks in forward path. Instead, Linux tries to send out packets exceeding
+the mtu.
+
+When locking route MTU on Host (i.e., no ipv4 DF bit set), R1 does
+not fragment the packets when forwarding, and again tries to send out
+packets exceeding R1-R2 link mtu.
+
+This alters the forwarding dstmtu checks to take the individual gso
+segment lengths into account.
+
+For ipv6, we send out pkt too big error for gso if the individual
+segments are too big.
+
+For ipv4, we either send icmp fragmentation needed, or, if the DF bit
+is not set, perform software segmentation and let the output path
+create fragments when the packet is leaving the machine.
+It is not 100% correct as the error message will contain the headers of
+the GRO skb instead of the original/segmented one, but it seems to
+work fine in my (limited) tests.
+
+Eric Dumazet suggested to simply shrink mss via ->gso_size to avoid
+sofware segmentation.
+
+However it turns out that skb_segment() assumes skb nr_frags is related
+to mss size so we would BUG there. I don't want to mess with it considering
+Herbert and Eric disagree on what the correct behavior should be.
+
+Hannes Frederic Sowa notes that when we would shrink gso_size
+skb_segment would then also need to deal with the case where
+SKB_MAX_FRAGS would be exceeded.
+
+This uses sofware segmentation in the forward path when we hit ipv4
+non-DF packets and the outgoing link mtu is too small. Its not perfect,
+but given the lack of bug reports wrt. GRO fwd being broken this is a
+rare case anyway. Also its not like this could not be improved later
+once the dust settles.
+
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Reported-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h | 17 +++++++++++
+ net/ipv4/ip_forward.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++--
+ net/ipv6/ip6_output.c | 17 ++++++++++-
+ 3 files changed, 101 insertions(+), 4 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -2811,5 +2811,22 @@ static inline bool skb_head_is_locked(co
+ {
+ return !skb->head_frag || skb_cloned(skb);
+ }
++
++/**
++ * skb_gso_network_seglen - Return length of individual segments of a gso packet
++ *
++ * @skb: GSO skb
++ *
++ * skb_gso_network_seglen is used to determine the real size of the
++ * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
++ *
++ * The MAC/L2 header is not accounted for.
++ */
++static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
++{
++ unsigned int hdr_len = skb_transport_header(skb) -
++ skb_network_header(skb);
++ return hdr_len + skb_gso_transport_seglen(skb);
++}
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_SKBUFF_H */
+--- a/net/ipv4/ip_forward.c
++++ b/net/ipv4/ip_forward.c
+@@ -39,6 +39,71 @@
+ #include <net/route.h>
+ #include <net/xfrm.h>
+
++static bool ip_may_fragment(const struct sk_buff *skb)
++{
++ return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
++ !skb->local_df;
++}
++
++static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
++{
++ if (skb->len <= mtu || skb->local_df)
++ return false;
++
++ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
++ return false;
++
++ return true;
++}
++
++static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
++{
++ unsigned int mtu;
++
++ if (skb->local_df || !skb_is_gso(skb))
++ return false;
++
++ mtu = dst_mtu(skb_dst(skb));
++
++ /* if seglen > mtu, do software segmentation for IP fragmentation on
++ * output. DF bit cannot be set since ip_forward would have sent
++ * icmp error.
++ */
++ return skb_gso_network_seglen(skb) > mtu;
++}
++
++/* called if GSO skb needs to be fragmented on forward */
++static int ip_forward_finish_gso(struct sk_buff *skb)
++{
++ struct dst_entry *dst = skb_dst(skb);
++ netdev_features_t features;
++ struct sk_buff *segs;
++ int ret = 0;
++
++ features = netif_skb_dev_features(skb, dst->dev);
++ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
++ if (IS_ERR(segs)) {
++ kfree_skb(skb);
++ return -ENOMEM;
++ }
++
++ consume_skb(skb);
++
++ do {
++ struct sk_buff *nskb = segs->next;
++ int err;
++
++ segs->next = NULL;
++ err = dst_output(segs);
++
++ if (err && ret == 0)
++ ret = err;
++ segs = nskb;
++ } while (segs);
++
++ return ret;
++}
++
+ static int ip_forward_finish(struct sk_buff *skb)
+ {
+ struct ip_options *opt = &(IPCB(skb)->opt);
+@@ -49,6 +114,9 @@ static int ip_forward_finish(struct sk_b
+ if (unlikely(opt->optlen))
+ ip_forward_options(skb);
+
++ if (ip_gso_exceeds_dst_mtu(skb))
++ return ip_forward_finish_gso(skb);
++
+ return dst_output(skb);
+ }
+
+@@ -88,8 +156,7 @@ int ip_forward(struct sk_buff *skb)
+ if (opt->is_strictroute && rt->rt_uses_gateway)
+ goto sr_failed;
+
+- if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
+- (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
++ if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, dst_mtu(&rt->dst))) {
+ IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(dst_mtu(&rt->dst)));
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -321,6 +321,20 @@ static inline int ip6_forward_finish(str
+ return dst_output(skb);
+ }
+
++static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
++{
++ if (skb->len <= mtu || skb->local_df)
++ return false;
++
++ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
++ return true;
++
++ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
++ return false;
++
++ return true;
++}
++
+ int ip6_forward(struct sk_buff *skb)
+ {
+ struct dst_entry *dst = skb_dst(skb);
+@@ -443,8 +457,7 @@ int ip6_forward(struct sk_buff *skb)
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+
+- if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
+- (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
++ if (ip6_pkt_too_big(skb, mtu)) {
+ /* Again, force OUTPUT device used as source address */
+ skb->dev = dst->dev;
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: willy tarreau <w@1wt.eu>
+Date: Thu, 16 Jan 2014 08:20:10 +0100
+Subject: net: mvneta: add missing bit descriptions for interrupt masks and causes
+
+From: willy tarreau <w@1wt.eu>
+
+[ Upstream commit 40ba35e74fa56866918d2f3bc0528b5b92725d5e ]
+
+Marvell has not published the chip's datasheet yet, so it's very hard
+to find the relevant bits to manipulate to change the IRQ behaviour.
+Fortunately, these bits are described in the proprietary LSP patch set
+which is publicly available here :
+
+ http://www.plugcomputer.org/downloads/mirabox/
+
+So let's put them back in the driver in order to reduce the burden of
+current and future maintenance.
+
+Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Tested-by: Arnaud Ebalard <arno@natisbad.org>
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvneta.c | 44 ++++++++++++++++++++++++++++++++--
+ 1 file changed, 42 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -101,16 +101,56 @@
+ #define MVNETA_CPU_RXQ_ACCESS_ALL_MASK 0x000000ff
+ #define MVNETA_CPU_TXQ_ACCESS_ALL_MASK 0x0000ff00
+ #define MVNETA_RXQ_TIME_COAL_REG(q) (0x2580 + ((q) << 2))
++
++/* Exception Interrupt Port/Queue Cause register */
++
+ #define MVNETA_INTR_NEW_CAUSE 0x25a0
+-#define MVNETA_RX_INTR_MASK(nr_rxqs) (((1 << nr_rxqs) - 1) << 8)
+ #define MVNETA_INTR_NEW_MASK 0x25a4
++
++/* bits 0..7 = TXQ SENT, one bit per queue.
++ * bits 8..15 = RXQ OCCUP, one bit per queue.
++ * bits 16..23 = RXQ FREE, one bit per queue.
++ * bit 29 = OLD_REG_SUM, see old reg ?
++ * bit 30 = TX_ERR_SUM, one bit for 4 ports
++ * bit 31 = MISC_SUM, one bit for 4 ports
++ */
++#define MVNETA_TX_INTR_MASK(nr_txqs) (((1 << nr_txqs) - 1) << 0)
++#define MVNETA_TX_INTR_MASK_ALL (0xff << 0)
++#define MVNETA_RX_INTR_MASK(nr_rxqs) (((1 << nr_rxqs) - 1) << 8)
++#define MVNETA_RX_INTR_MASK_ALL (0xff << 8)
++
+ #define MVNETA_INTR_OLD_CAUSE 0x25a8
+ #define MVNETA_INTR_OLD_MASK 0x25ac
++
++/* Data Path Port/Queue Cause Register */
+ #define MVNETA_INTR_MISC_CAUSE 0x25b0
+ #define MVNETA_INTR_MISC_MASK 0x25b4
++
++#define MVNETA_CAUSE_PHY_STATUS_CHANGE BIT(0)
++#define MVNETA_CAUSE_LINK_CHANGE BIT(1)
++#define MVNETA_CAUSE_PTP BIT(4)
++
++#define MVNETA_CAUSE_INTERNAL_ADDR_ERR BIT(7)
++#define MVNETA_CAUSE_RX_OVERRUN BIT(8)
++#define MVNETA_CAUSE_RX_CRC_ERROR BIT(9)
++#define MVNETA_CAUSE_RX_LARGE_PKT BIT(10)
++#define MVNETA_CAUSE_TX_UNDERUN BIT(11)
++#define MVNETA_CAUSE_PRBS_ERR BIT(12)
++#define MVNETA_CAUSE_PSC_SYNC_CHANGE BIT(13)
++#define MVNETA_CAUSE_SERDES_SYNC_ERR BIT(14)
++
++#define MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT 16
++#define MVNETA_CAUSE_BMU_ALLOC_ERR_ALL_MASK (0xF << MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT)
++#define MVNETA_CAUSE_BMU_ALLOC_ERR_MASK(pool) (1 << (MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT + (pool)))
++
++#define MVNETA_CAUSE_TXQ_ERROR_SHIFT 24
++#define MVNETA_CAUSE_TXQ_ERROR_ALL_MASK (0xFF << MVNETA_CAUSE_TXQ_ERROR_SHIFT)
++#define MVNETA_CAUSE_TXQ_ERROR_MASK(q) (1 << (MVNETA_CAUSE_TXQ_ERROR_SHIFT + (q)))
++
+ #define MVNETA_INTR_ENABLE 0x25b8
+ #define MVNETA_TXQ_INTR_ENABLE_ALL_MASK 0x0000ff00
+-#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0xff000000
++#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0xff000000 // note: neta says it's 0x000000FF
++
+ #define MVNETA_RXQ_CMD 0x2680
+ #define MVNETA_RXQ_DISABLE_SHIFT 8
+ #define MVNETA_RXQ_ENABLE_MASK 0x000000ff
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: willy tarreau <w@1wt.eu>
+Date: Thu, 16 Jan 2014 08:20:09 +0100
+Subject: net: mvneta: do not schedule in mvneta_tx_timeout
+
+From: willy tarreau <w@1wt.eu>
+
+[ Upstream commit 290213667ab53a95456397763205e4b1e30f46b5 ]
+
+If a queue timeout is reported, we can oops because of some
+schedules while the caller is atomic, as shown below :
+
+ mvneta d0070000.ethernet eth0: tx timeout
+ BUG: scheduling while atomic: bash/1528/0x00000100
+ Modules linked in: slhttp_ethdiv(C) [last unloaded: slhttp_ethdiv]
+ CPU: 2 PID: 1528 Comm: bash Tainted: G WC 3.13.0-rc4-mvebu-nf #180
+ [<c0011bd9>] (unwind_backtrace+0x1/0x98) from [<c000f1ab>] (show_stack+0xb/0xc)
+ [<c000f1ab>] (show_stack+0xb/0xc) from [<c02ad323>] (dump_stack+0x4f/0x64)
+ [<c02ad323>] (dump_stack+0x4f/0x64) from [<c02abe67>] (__schedule_bug+0x37/0x4c)
+ [<c02abe67>] (__schedule_bug+0x37/0x4c) from [<c02ae261>] (__schedule+0x325/0x3ec)
+ [<c02ae261>] (__schedule+0x325/0x3ec) from [<c02adb97>] (schedule_timeout+0xb7/0x118)
+ [<c02adb97>] (schedule_timeout+0xb7/0x118) from [<c0020a67>] (msleep+0xf/0x14)
+ [<c0020a67>] (msleep+0xf/0x14) from [<c01dcbe5>] (mvneta_stop_dev+0x21/0x194)
+ [<c01dcbe5>] (mvneta_stop_dev+0x21/0x194) from [<c01dcfe9>] (mvneta_tx_timeout+0x19/0x24)
+ [<c01dcfe9>] (mvneta_tx_timeout+0x19/0x24) from [<c024afc7>] (dev_watchdog+0x18b/0x1c4)
+ [<c024afc7>] (dev_watchdog+0x18b/0x1c4) from [<c0020b53>] (call_timer_fn.isra.27+0x17/0x5c)
+ [<c0020b53>] (call_timer_fn.isra.27+0x17/0x5c) from [<c0020cad>] (run_timer_softirq+0x115/0x170)
+ [<c0020cad>] (run_timer_softirq+0x115/0x170) from [<c001ccb9>] (__do_softirq+0xbd/0x1a8)
+ [<c001ccb9>] (__do_softirq+0xbd/0x1a8) from [<c001cfad>] (irq_exit+0x61/0x98)
+ [<c001cfad>] (irq_exit+0x61/0x98) from [<c000d4bf>] (handle_IRQ+0x27/0x60)
+ [<c000d4bf>] (handle_IRQ+0x27/0x60) from [<c000843b>] (armada_370_xp_handle_irq+0x33/0xc8)
+ [<c000843b>] (armada_370_xp_handle_irq+0x33/0xc8) from [<c000fba9>] (__irq_usr+0x49/0x60)
+
+Ben Hutchings attempted to propose a better fix consisting in using a
+scheduled work for this, but while it fixed this panic, it caused other
+random freezes and panics proving that the reset sequence in the driver
+is unreliable and that additional fixes should be investigated.
+
+When sending multiple streams over a link limited to 100 Mbps, Tx timeouts
+happen from time to time, and the driver correctly recovers only when the
+function is disabled.
+
+Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Tested-by: Arnaud Ebalard <arno@natisbad.org>
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvneta.c | 11 -----------
+ 1 file changed, 11 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -2244,16 +2244,6 @@ static void mvneta_stop_dev(struct mvnet
+ mvneta_rx_reset(pp);
+ }
+
+-/* tx timeout callback - display a message and stop/start the network device */
+-static void mvneta_tx_timeout(struct net_device *dev)
+-{
+- struct mvneta_port *pp = netdev_priv(dev);
+-
+- netdev_info(dev, "tx timeout\n");
+- mvneta_stop_dev(pp);
+- mvneta_start_dev(pp);
+-}
+-
+ /* Return positive if MTU is valid */
+ static int mvneta_check_mtu_valid(struct net_device *dev, int mtu)
+ {
+@@ -2634,7 +2624,6 @@ static const struct net_device_ops mvnet
+ .ndo_set_rx_mode = mvneta_set_rx_mode,
+ .ndo_set_mac_address = mvneta_set_mac_addr,
+ .ndo_change_mtu = mvneta_change_mtu,
+- .ndo_tx_timeout = mvneta_tx_timeout,
+ .ndo_get_stats64 = mvneta_get_stats64,
+ .ndo_do_ioctl = mvneta_ioctl,
+ };
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: willy tarreau <w@1wt.eu>
+Date: Thu, 16 Jan 2014 08:20:07 +0100
+Subject: net: mvneta: increase the 64-bit rx/tx stats out of the hot path
+
+From: willy tarreau <w@1wt.eu>
+
+[ Upstream commit dc4277dd41a80fd5f29a90412ea04bc3ba54fbf1 ]
+
+Better count packets and bytes in the stack and on 32 bit then
+accumulate them at the end for once. This saves two memory writes
+and two memory barriers per packet. The incoming packet rate was
+increased by 4.7% on the Openblocks AX3 thanks to this.
+
+Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Tested-by: Arnaud Ebalard <arno@natisbad.org>
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvneta.c | 15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -1391,6 +1391,8 @@ static int mvneta_rx(struct mvneta_port
+ {
+ struct net_device *dev = pp->dev;
+ int rx_done, rx_filled;
++ u32 rcvd_pkts = 0;
++ u32 rcvd_bytes = 0;
+
+ /* Get number of received packets */
+ rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq);
+@@ -1428,10 +1430,8 @@ static int mvneta_rx(struct mvneta_port
+
+ rx_bytes = rx_desc->data_size -
+ (ETH_FCS_LEN + MVNETA_MH_SIZE);
+- u64_stats_update_begin(&pp->rx_stats.syncp);
+- pp->rx_stats.packets++;
+- pp->rx_stats.bytes += rx_bytes;
+- u64_stats_update_end(&pp->rx_stats.syncp);
++ rcvd_pkts++;
++ rcvd_bytes += rx_bytes;
+
+ /* Linux processing */
+ skb_reserve(skb, MVNETA_MH_SIZE);
+@@ -1452,6 +1452,13 @@ static int mvneta_rx(struct mvneta_port
+ }
+ }
+
++ if (rcvd_pkts) {
++ u64_stats_update_begin(&pp->rx_stats.syncp);
++ pp->rx_stats.packets += rcvd_pkts;
++ pp->rx_stats.bytes += rcvd_bytes;
++ u64_stats_update_end(&pp->rx_stats.syncp);
++ }
++
+ /* Update rxq management counters */
+ mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_filled);
+
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: willy tarreau <w@1wt.eu>
+Date: Thu, 16 Jan 2014 08:20:11 +0100
+Subject: net: mvneta: replace Tx timer with a real interrupt
+
+From: willy tarreau <w@1wt.eu>
+
+[ Upstream commit 71f6d1b31fb1f278a345a30a2180515adc7d80ae ]
+
+Right now the mvneta driver doesn't handle Tx IRQ, and relies on two
+mechanisms to flush Tx descriptors : a flush at the end of mvneta_tx()
+and a timer. If a burst of packets is emitted faster than the device
+can send them, then the queue is stopped until next wake-up of the
+timer 10ms later. This causes jerky output traffic with bursts and
+pauses, making it difficult to reach line rate with very few streams.
+
+A test on UDP traffic shows that it's not possible to go beyond 134
+Mbps / 12 kpps of outgoing traffic with 1500-bytes IP packets. Routed
+traffic tends to observe pauses as well if the traffic is bursty,
+making it even burstier after the wake-up.
+
+It seems that this feature was inherited from the original driver but
+nothing there mentions any reason for not using the interrupt instead,
+which the chip supports.
+
+Thus, this patch enables Tx interrupts and removes the timer. It does
+the two at once because it's not really possible to make the two
+mechanisms coexist, so a split patch doesn't make sense.
+
+First tests performed on a Mirabox (Armada 370) show that less CPU
+seems to be used when sending traffic. One reason might be that we now
+call the mvneta_tx_done_gbe() with a mask indicating which queues have
+been done instead of looping over all of them.
+
+The same UDP test above now happily reaches 987 Mbps / 87.7 kpps.
+Single-stream TCP traffic can now more easily reach line rate. HTTP
+transfers of 1 MB objects over a single connection went from 730 to
+840 Mbps. It is even possible to go significantly higher (>900 Mbps)
+by tweaking tcp_tso_win_divisor.
+
+Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Cc: Arnaud Ebalard <arno@natisbad.org>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Tested-by: Arnaud Ebalard <arno@natisbad.org>
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvneta.c | 71 +++++-----------------------------
+ 1 file changed, 12 insertions(+), 59 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -216,9 +216,6 @@
+ #define MVNETA_RX_COAL_PKTS 32
+ #define MVNETA_RX_COAL_USEC 100
+
+-/* Timer */
+-#define MVNETA_TX_DONE_TIMER_PERIOD 10
+-
+ /* Napi polling weight */
+ #define MVNETA_RX_POLL_WEIGHT 64
+
+@@ -274,16 +271,11 @@ struct mvneta_port {
+ void __iomem *base;
+ struct mvneta_rx_queue *rxqs;
+ struct mvneta_tx_queue *txqs;
+- struct timer_list tx_done_timer;
+ struct net_device *dev;
+
+ u32 cause_rx_tx;
+ struct napi_struct napi;
+
+- /* Flags */
+- unsigned long flags;
+-#define MVNETA_F_TX_DONE_TIMER_BIT 0
+-
+ /* Napi weight */
+ int weight;
+
+@@ -1149,17 +1141,6 @@ static void mvneta_tx_done_pkts_coal_set
+ txq->done_pkts_coal = value;
+ }
+
+-/* Trigger tx done timer in MVNETA_TX_DONE_TIMER_PERIOD msecs */
+-static void mvneta_add_tx_done_timer(struct mvneta_port *pp)
+-{
+- if (test_and_set_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags) == 0) {
+- pp->tx_done_timer.expires = jiffies +
+- msecs_to_jiffies(MVNETA_TX_DONE_TIMER_PERIOD);
+- add_timer(&pp->tx_done_timer);
+- }
+-}
+-
+-
+ /* Handle rx descriptor fill by setting buf_cookie and buf_phys_addr */
+ static void mvneta_rx_desc_fill(struct mvneta_rx_desc *rx_desc,
+ u32 phys_addr, u32 cookie)
+@@ -1651,15 +1632,6 @@ out:
+ dev_kfree_skb_any(skb);
+ }
+
+- if (txq->count >= MVNETA_TXDONE_COAL_PKTS)
+- mvneta_txq_done(pp, txq);
+-
+- /* If after calling mvneta_txq_done, count equals
+- * frags, we need to set the timer
+- */
+- if (txq->count == frags && frags > 0)
+- mvneta_add_tx_done_timer(pp);
+-
+ return NETDEV_TX_OK;
+ }
+
+@@ -1935,14 +1907,22 @@ static int mvneta_poll(struct napi_struc
+
+ /* Read cause register */
+ cause_rx_tx = mvreg_read(pp, MVNETA_INTR_NEW_CAUSE) &
+- MVNETA_RX_INTR_MASK(rxq_number);
++ (MVNETA_RX_INTR_MASK(rxq_number) | MVNETA_TX_INTR_MASK(txq_number));
++
++ /* Release Tx descriptors */
++ if (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL) {
++ int tx_todo = 0;
++
++ mvneta_tx_done_gbe(pp, (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL), &tx_todo);
++ cause_rx_tx &= ~MVNETA_TX_INTR_MASK_ALL;
++ }
+
+ /* For the case where the last mvneta_poll did not process all
+ * RX packets
+ */
+ cause_rx_tx |= pp->cause_rx_tx;
+ if (rxq_number > 1) {
+- while ((cause_rx_tx != 0) && (budget > 0)) {
++ while ((cause_rx_tx & MVNETA_RX_INTR_MASK_ALL) && (budget > 0)) {
+ int count;
+ struct mvneta_rx_queue *rxq;
+ /* get rx queue number from cause_rx_tx */
+@@ -1974,7 +1954,7 @@ static int mvneta_poll(struct napi_struc
+ napi_complete(napi);
+ local_irq_save(flags);
+ mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+- MVNETA_RX_INTR_MASK(rxq_number));
++ MVNETA_RX_INTR_MASK(rxq_number) | MVNETA_TX_INTR_MASK(txq_number));
+ local_irq_restore(flags);
+ }
+
+@@ -1982,26 +1962,6 @@ static int mvneta_poll(struct napi_struc
+ return rx_done;
+ }
+
+-/* tx done timer callback */
+-static void mvneta_tx_done_timer_callback(unsigned long data)
+-{
+- struct net_device *dev = (struct net_device *)data;
+- struct mvneta_port *pp = netdev_priv(dev);
+- int tx_done = 0, tx_todo = 0;
+-
+- if (!netif_running(dev))
+- return ;
+-
+- clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
+-
+- tx_done = mvneta_tx_done_gbe(pp,
+- (((1 << txq_number) - 1) &
+- MVNETA_CAUSE_TXQ_SENT_DESC_ALL_MASK),
+- &tx_todo);
+- if (tx_todo > 0)
+- mvneta_add_tx_done_timer(pp);
+-}
+-
+ /* Handle rxq fill: allocates rxq skbs; called when initializing a port */
+ static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
+ int num)
+@@ -2251,7 +2211,7 @@ static void mvneta_start_dev(struct mvne
+
+ /* Unmask interrupts */
+ mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+- MVNETA_RX_INTR_MASK(rxq_number));
++ MVNETA_RX_INTR_MASK(rxq_number) | MVNETA_TX_INTR_MASK(txq_number));
+
+ phy_start(pp->phy_dev);
+ netif_tx_start_all_queues(pp->dev);
+@@ -2527,8 +2487,6 @@ static int mvneta_stop(struct net_device
+ free_irq(dev->irq, pp);
+ mvneta_cleanup_rxqs(pp);
+ mvneta_cleanup_txqs(pp);
+- del_timer(&pp->tx_done_timer);
+- clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
+
+ return 0;
+ }
+@@ -2887,11 +2845,6 @@ static int mvneta_probe(struct platform_
+ }
+ }
+
+- pp->tx_done_timer.data = (unsigned long)dev;
+- pp->tx_done_timer.function = mvneta_tx_done_timer_callback;
+- init_timer(&pp->tx_done_timer);
+- clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
+-
+ pp->tx_ring_size = MVNETA_MAX_TXD;
+ pp->rx_ring_size = MVNETA_MAX_RXD;
+
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: willy tarreau <w@1wt.eu>
+Date: Thu, 16 Jan 2014 08:20:08 +0100
+Subject: net: mvneta: use per_cpu stats to fix an SMP lock up
+
+From: willy tarreau <w@1wt.eu>
+
+[ Upstream commit 74c41b048db1073a04827d7f39e95ac1935524cc ]
+
+Stats writers are mvneta_rx() and mvneta_tx(). They don't lock anything
+when they update the stats, and as a result, it randomly happens that
+the stats freeze on SMP if two updates happen during stats retrieval.
+This is very easily reproducible by starting two HTTP servers and binding
+each of them to a different CPU, then consulting /proc/net/dev in loops
+during transfers, the interface should immediately lock up. This issue
+also randomly happens upon link state changes during transfers, because
+the stats are collected in this situation, but it takes more attempts to
+reproduce it.
+
+The comments in netdevice.h suggest using per_cpu stats instead to get
+rid of this issue.
+
+This patch implements this. It merges both rx_stats and tx_stats into
+a single "stats" member with a single syncp. Both mvneta_rx() and
+mvneta_rx() now only update the a single CPU's counters.
+
+In turn, mvneta_get_stats64() does the summing by iterating over all CPUs
+to get their respective stats.
+
+With this change, stats are still correct and no more lockup is encountered.
+
+Note that this bug was present since the first import of the mvneta
+driver. It might make sense to backport it to some stable trees. If
+so, it depends on "d33dc73 net: mvneta: increase the 64-bit rx/tx stats
+out of the hot path".
+
+Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Tested-by: Arnaud Ebalard <arno@natisbad.org>
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvneta.c | 88 ++++++++++++++++++++++------------
+ 1 file changed, 57 insertions(+), 31 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -221,10 +221,12 @@
+
+ #define MVNETA_RX_BUF_SIZE(pkt_size) ((pkt_size) + NET_SKB_PAD)
+
+-struct mvneta_stats {
++struct mvneta_pcpu_stats {
+ struct u64_stats_sync syncp;
+- u64 packets;
+- u64 bytes;
++ u64 rx_packets;
++ u64 rx_bytes;
++ u64 tx_packets;
++ u64 tx_bytes;
+ };
+
+ struct mvneta_port {
+@@ -250,8 +252,7 @@ struct mvneta_port {
+ u8 mcast_count[256];
+ u16 tx_ring_size;
+ u16 rx_ring_size;
+- struct mvneta_stats tx_stats;
+- struct mvneta_stats rx_stats;
++ struct mvneta_pcpu_stats *stats;
+
+ struct mii_bus *mii_bus;
+ struct phy_device *phy_dev;
+@@ -461,21 +462,29 @@ struct rtnl_link_stats64 *mvneta_get_sta
+ {
+ struct mvneta_port *pp = netdev_priv(dev);
+ unsigned int start;
++ int cpu;
+
+- memset(stats, 0, sizeof(struct rtnl_link_stats64));
+-
+- do {
+- start = u64_stats_fetch_begin_bh(&pp->rx_stats.syncp);
+- stats->rx_packets = pp->rx_stats.packets;
+- stats->rx_bytes = pp->rx_stats.bytes;
+- } while (u64_stats_fetch_retry_bh(&pp->rx_stats.syncp, start));
+-
+-
+- do {
+- start = u64_stats_fetch_begin_bh(&pp->tx_stats.syncp);
+- stats->tx_packets = pp->tx_stats.packets;
+- stats->tx_bytes = pp->tx_stats.bytes;
+- } while (u64_stats_fetch_retry_bh(&pp->tx_stats.syncp, start));
++ for_each_possible_cpu(cpu) {
++ struct mvneta_pcpu_stats *cpu_stats;
++ u64 rx_packets;
++ u64 rx_bytes;
++ u64 tx_packets;
++ u64 tx_bytes;
++
++ cpu_stats = per_cpu_ptr(pp->stats, cpu);
++ do {
++ start = u64_stats_fetch_begin_bh(&cpu_stats->syncp);
++ rx_packets = cpu_stats->rx_packets;
++ rx_bytes = cpu_stats->rx_bytes;
++ tx_packets = cpu_stats->tx_packets;
++ tx_bytes = cpu_stats->tx_bytes;
++ } while (u64_stats_fetch_retry_bh(&cpu_stats->syncp, start));
++
++ stats->rx_packets += rx_packets;
++ stats->rx_bytes += rx_bytes;
++ stats->tx_packets += tx_packets;
++ stats->tx_bytes += tx_bytes;
++ }
+
+ stats->rx_errors = dev->stats.rx_errors;
+ stats->rx_dropped = dev->stats.rx_dropped;
+@@ -1453,10 +1462,12 @@ static int mvneta_rx(struct mvneta_port
+ }
+
+ if (rcvd_pkts) {
+- u64_stats_update_begin(&pp->rx_stats.syncp);
+- pp->rx_stats.packets += rcvd_pkts;
+- pp->rx_stats.bytes += rcvd_bytes;
+- u64_stats_update_end(&pp->rx_stats.syncp);
++ struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
++
++ u64_stats_update_begin(&stats->syncp);
++ stats->rx_packets += rcvd_pkts;
++ stats->rx_bytes += rcvd_bytes;
++ u64_stats_update_end(&stats->syncp);
+ }
+
+ /* Update rxq management counters */
+@@ -1589,11 +1600,12 @@ static int mvneta_tx(struct sk_buff *skb
+
+ out:
+ if (frags > 0) {
+- u64_stats_update_begin(&pp->tx_stats.syncp);
+- pp->tx_stats.packets++;
+- pp->tx_stats.bytes += skb->len;
+- u64_stats_update_end(&pp->tx_stats.syncp);
++ struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+
++ u64_stats_update_begin(&stats->syncp);
++ stats->tx_packets++;
++ stats->tx_bytes += skb->len;
++ u64_stats_update_end(&stats->syncp);
+ } else {
+ dev->stats.tx_dropped++;
+ dev_kfree_skb_any(skb);
+@@ -2758,6 +2770,7 @@ static int mvneta_probe(struct platform_
+ const char *mac_from;
+ int phy_mode;
+ int err;
++ int cpu;
+
+ /* Our multiqueue support is not complete, so for now, only
+ * allow the usage of the first RX queue
+@@ -2799,9 +2812,6 @@ static int mvneta_probe(struct platform_
+
+ pp = netdev_priv(dev);
+
+- u64_stats_init(&pp->tx_stats.syncp);
+- u64_stats_init(&pp->rx_stats.syncp);
+-
+ pp->weight = MVNETA_RX_POLL_WEIGHT;
+ pp->phy_node = phy_node;
+ pp->phy_interface = phy_mode;
+@@ -2820,6 +2830,19 @@ static int mvneta_probe(struct platform_
+ goto err_clk;
+ }
+
++ /* Alloc per-cpu stats */
++ pp->stats = alloc_percpu(struct mvneta_pcpu_stats);
++ if (!pp->stats) {
++ err = -ENOMEM;
++ goto err_unmap;
++ }
++
++ for_each_possible_cpu(cpu) {
++ struct mvneta_pcpu_stats *stats;
++ stats = per_cpu_ptr(pp->stats, cpu);
++ u64_stats_init(&stats->syncp);
++ }
++
+ dt_mac_addr = of_get_mac_address(dn);
+ if (dt_mac_addr) {
+ mac_from = "device tree";
+@@ -2849,7 +2872,7 @@ static int mvneta_probe(struct platform_
+ err = mvneta_init(pp, phy_addr);
+ if (err < 0) {
+ dev_err(&pdev->dev, "can't init eth hal\n");
+- goto err_unmap;
++ goto err_free_stats;
+ }
+ mvneta_port_power_up(pp, phy_mode);
+
+@@ -2879,6 +2902,8 @@ static int mvneta_probe(struct platform_
+
+ err_deinit:
+ mvneta_deinit(pp);
++err_free_stats:
++ free_percpu(pp->stats);
+ err_unmap:
+ iounmap(pp->base);
+ err_clk:
+@@ -2899,6 +2924,7 @@ static int mvneta_remove(struct platform
+ unregister_netdev(dev);
+ mvneta_deinit(pp);
+ clk_disable_unprepare(pp->clk);
++ free_percpu(pp->stats);
+ iounmap(pp->base);
+ irq_dispose_mapping(dev->irq);
+ free_netdev(dev);
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Bjørn Mork <bjorn@mork.no>
+Date: Tue, 4 Feb 2014 13:04:33 +0100
+Subject: net: qmi_wwan: add Netgear Aircard 340U
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Bjørn Mork <bjorn@mork.no>
+
+[ Upstream commit fbd3a77d813f211060f86cc7a2f8416caf0e03b1 ]
+
+This device was mentioned in an OpenWRT forum. Seems to have a "standard"
+Sierra Wireless ifnumber to function layout:
+ 0: qcdm
+ 2: nmea
+ 3: modem
+ 8: qmi
+ 9: storage
+
+Signed-off-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/qmi_wwan.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -723,6 +723,7 @@ static const struct usb_device_id produc
+ {QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */
+ {QMI_FIXED_INTF(0x1199, 0x68a2, 19)}, /* Sierra Wireless MC7710 in QMI mode */
+ {QMI_FIXED_INTF(0x1199, 0x901c, 8)}, /* Sierra Wireless EM7700 */
++ {QMI_FIXED_INTF(0x1199, 0x9051, 8)}, /* Netgear AirCard 340U */
+ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
+ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */
+ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Daniel Borkmann <dborkman@redhat.com>
+Date: Mon, 17 Feb 2014 12:11:11 +0100
+Subject: net: sctp: fix sctp_connectx abi for ia32 emulation/compat mode
+
+From: Daniel Borkmann <dborkman@redhat.com>
+
+[ Upstream commit ffd5939381c609056b33b7585fb05a77b4c695f3 ]
+
+SCTP's sctp_connectx() abi breaks for 64bit kernels compiled with 32bit
+emulation (e.g. ia32 emulation or x86_x32). Due to internal usage of
+'struct sctp_getaddrs_old' which includes a struct sockaddr pointer,
+sizeof(param) check will always fail in kernel as the structure in
+64bit kernel space is 4bytes larger than for user binaries compiled
+in 32bit mode. Thus, applications making use of sctp_connectx() won't
+be able to run under such circumstances.
+
+Introduce a compat interface in the kernel to deal with such
+situations by using a 'struct compat_sctp_getaddrs_old' structure
+where user data is copied into it, and then sucessively transformed
+into a 'struct sctp_getaddrs_old' structure with the help of
+compat_ptr(). That fixes sctp_connectx() abi without any changes
+needed in user space, and lets the SCTP test suite pass when compiled
+in 32bit and run on 64bit kernels.
+
+Fixes: f9c67811ebc0 ("sctp: Fix regression introduced by new sctp_connectx api")
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Acked-by: Vlad Yasevich <vyasevich@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c | 41 ++++++++++++++++++++++++++++++++---------
+ 1 file changed, 32 insertions(+), 9 deletions(-)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -65,6 +65,7 @@
+ #include <linux/crypto.h>
+ #include <linux/slab.h>
+ #include <linux/file.h>
++#include <linux/compat.h>
+
+ #include <net/ip.h>
+ #include <net/icmp.h>
+@@ -1369,11 +1370,19 @@ static int sctp_setsockopt_connectx(stru
+ /*
+ * New (hopefully final) interface for the API.
+ * We use the sctp_getaddrs_old structure so that use-space library
+- * can avoid any unnecessary allocations. The only defferent part
++ * can avoid any unnecessary allocations. The only different part
+ * is that we store the actual length of the address buffer into the
+- * addrs_num structure member. That way we can re-use the existing
++ * addrs_num structure member. That way we can re-use the existing
+ * code.
+ */
++#ifdef CONFIG_COMPAT
++struct compat_sctp_getaddrs_old {
++ sctp_assoc_t assoc_id;
++ s32 addr_num;
++ compat_uptr_t addrs; /* struct sockaddr * */
++};
++#endif
++
+ static int sctp_getsockopt_connectx3(struct sock* sk, int len,
+ char __user *optval,
+ int __user *optlen)
+@@ -1382,16 +1391,30 @@ static int sctp_getsockopt_connectx3(str
+ sctp_assoc_t assoc_id = 0;
+ int err = 0;
+
+- if (len < sizeof(param))
+- return -EINVAL;
++#ifdef CONFIG_COMPAT
++ if (is_compat_task()) {
++ struct compat_sctp_getaddrs_old param32;
+
+- if (copy_from_user(¶m, optval, sizeof(param)))
+- return -EFAULT;
++ if (len < sizeof(param32))
++ return -EINVAL;
++ if (copy_from_user(¶m32, optval, sizeof(param32)))
++ return -EFAULT;
+
+- err = __sctp_setsockopt_connectx(sk,
+- (struct sockaddr __user *)param.addrs,
+- param.addr_num, &assoc_id);
++ param.assoc_id = param32.assoc_id;
++ param.addr_num = param32.addr_num;
++ param.addrs = compat_ptr(param32.addrs);
++ } else
++#endif
++ {
++ if (len < sizeof(param))
++ return -EINVAL;
++ if (copy_from_user(¶m, optval, sizeof(param)))
++ return -EFAULT;
++ }
+
++ err = __sctp_setsockopt_connectx(sk, (struct sockaddr __user *)
++ param.addrs, param.addr_num,
++ &assoc_id);
+ if (err == 0 || err == -EINPROGRESS) {
+ if (copy_to_user(optval, &assoc_id, sizeof(assoc_id)))
+ return -EFAULT;
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 6 Feb 2014 10:42:42 -0800
+Subject: net: use __GFP_NORETRY for high order allocations
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ed98df3361f059db42786c830ea96e2d18b8d4db ]
+
+sock_alloc_send_pskb() & sk_page_frag_refill()
+have a loop trying high order allocations to prepare
+skb with low number of fragments as this increases performance.
+
+Problem is that under memory pressure/fragmentation, this can
+trigger OOM while the intent was only to try the high order
+allocations, then fallback to order-0 allocations.
+
+We had various reports from unexpected regressions.
+
+According to David, setting __GFP_NORETRY should be fine,
+as the asynchronous compaction is still enabled, and this
+will prevent OOM from kicking as in :
+
+CFSClientEventm invoked oom-killer: gfp_mask=0x42d0, order=3, oom_adj=0,
+oom_score_adj=0, oom_score_badness=2 (enabled),memcg_scoring=disabled
+CFSClientEventm
+
+Call Trace:
+ [<ffffffff8043766c>] dump_header+0xe1/0x23e
+ [<ffffffff80437a02>] oom_kill_process+0x6a/0x323
+ [<ffffffff80438443>] out_of_memory+0x4b3/0x50d
+ [<ffffffff8043a4a6>] __alloc_pages_may_oom+0xa2/0xc7
+ [<ffffffff80236f42>] __alloc_pages_nodemask+0x1002/0x17f0
+ [<ffffffff8024bd23>] alloc_pages_current+0x103/0x2b0
+ [<ffffffff8028567f>] sk_page_frag_refill+0x8f/0x160
+ [<ffffffff80295fa0>] tcp_sendmsg+0x560/0xee0
+ [<ffffffff802a5037>] inet_sendmsg+0x67/0x100
+ [<ffffffff80283c9c>] __sock_sendmsg_nosec+0x6c/0x90
+ [<ffffffff80283e85>] sock_sendmsg+0xc5/0xf0
+ [<ffffffff802847b6>] __sys_sendmsg+0x136/0x430
+ [<ffffffff80284ec8>] sys_sendmsg+0x88/0x110
+ [<ffffffff80711472>] system_call_fastpath+0x16/0x1b
+Out of Memory: Kill process 2856 (bash) score 9999 or sacrifice child
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1800,7 +1800,9 @@ struct sk_buff *sock_alloc_send_pskb(str
+ while (order) {
+ if (npages >= 1 << order) {
+ page = alloc_pages(sk->sk_allocation |
+- __GFP_COMP | __GFP_NOWARN,
++ __GFP_COMP |
++ __GFP_NOWARN |
++ __GFP_NORETRY,
+ order);
+ if (page)
+ goto fill_page;
+@@ -1872,7 +1874,7 @@ bool skb_page_frag_refill(unsigned int s
+ gfp_t gfp = prio;
+
+ if (order)
+- gfp |= __GFP_COMP | __GFP_NOWARN;
++ gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
+ pfrag->page = alloc_pages(gfp, order);
+ if (likely(pfrag->page)) {
+ pfrag->offset = 0;
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Thu, 6 Feb 2014 18:34:12 +0100
+Subject: netpoll: fix netconsole IPv6 setup
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit 00fe11b3c67dc670fe6391d22f1fe64e7c99a8ec ]
+
+Currently, to make netconsole start over IPv6, the source address
+needs to be specified. Without a source address, netpoll_parse_options
+assumes we're setting up over IPv4 and the destination IPv6 address is
+rejected.
+
+Check if the IP version has been forced by a source address before
+checking for a version mismatch when parsing the destination address.
+
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Acked-by: Cong Wang <cwang@twopensource.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/netpoll.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/core/netpoll.c
++++ b/net/core/netpoll.c
+@@ -948,6 +948,7 @@ int netpoll_parse_options(struct netpoll
+ {
+ char *cur=opt, *delim;
+ int ipv6;
++ bool ipversion_set = false;
+
+ if (*cur != '@') {
+ if ((delim = strchr(cur, '@')) == NULL)
+@@ -960,6 +961,7 @@ int netpoll_parse_options(struct netpoll
+ cur++;
+
+ if (*cur != '/') {
++ ipversion_set = true;
+ if ((delim = strchr(cur, '/')) == NULL)
+ goto parse_failed;
+ *delim = 0;
+@@ -1002,7 +1004,7 @@ int netpoll_parse_options(struct netpoll
+ ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
+ if (ipv6 < 0)
+ goto parse_failed;
+- else if (np->ipv6 != (bool)ipv6)
++ else if (ipversion_set && np->ipv6 != (bool)ipv6)
+ goto parse_failed;
+ else
+ np->ipv6 = (bool)ipv6;
rtlwifi-rtl8192ce-fix-too-long-disable-of-irqs.patch
nfs-do-not-set-nfs_ino_invalid_label-unless-server-supports-labeled-nfs.patch
nfs-fix-error-return-in-nfs4_select_rw_stateid.patch
+6lowpan-fix-lockdep-splats.patch
+9p-trans_virtio.c-fix-broken-zero-copy-on-vmalloc-buffers.patch
+bridge-fix-netconsole-setup-over-bridge.patch
+can-add-destructor-for-self-generated-skbs.patch
+ipv4-fix-runtime-warning-in-rtmsg_ifa.patch
+net-fix-ip-rule-iif-oif-device-rename.patch
+netpoll-fix-netconsole-ipv6-setup.patch
+net-qmi_wwan-add-netgear-aircard-340u.patch
+tcp-tsq-fix-nonagle-handling.patch
+tg3-fix-deadlock-in-tg3_change_mtu.patch
+vhost-fix-ref-cnt-checking-deadlock.patch
+hyperv-fix-the-carrier-status-setting.patch
+net-asix-add-missing-flag-to-struct-driver_info.patch
+gre-add-link-local-route-when-local-addr-is-any.patch
+usbnet-remove-generic-hard_header_len-check.patch
+bonding-802.3ad-make-aggregator_identifier-bond-private.patch
+ipv4-fix-counter-in_slow_tot.patch
+net-sctp-fix-sctp_connectx-abi-for-ia32-emulation-compat-mode.patch
+net-add-and-use-skb_gso_transport_seglen.patch
+net-core-introduce-netif_skb_dev_features.patch
+net-ip-ipv6-handle-gso-skbs-in-forwarding-path.patch
+net-mvneta-increase-the-64-bit-rx-tx-stats-out-of-the-hot-path.patch
+net-mvneta-use-per_cpu-stats-to-fix-an-smp-lock-up.patch
+net-mvneta-do-not-schedule-in-mvneta_tx_timeout.patch
+net-mvneta-add-missing-bit-descriptions-for-interrupt-masks-and-causes.patch
+net-mvneta-replace-tx-timer-with-a-real-interrupt.patch
+net-use-__gfp_noretry-for-high-order-allocations.patch
+batman-adv-fix-soft-interface-mtu-computation.patch
+batman-adv-fix-tt-tvlv-parsing-on-ogm-reception.patch
+batman-adv-release-vlan-object-after-checking-the-crc.patch
+batman-adv-properly-check-pskb_may_pull-return-value.patch
+batman-adv-avoid-potential-race-condition-when-adding-a-new-neighbour.patch
+batman-adv-fix-potential-orig_node-reference-leak.patch
+batman-adv-fix-tt-crc-computation-by-ensuring-byte-order.patch
+batman-adv-free-skb-on-tvlv-parsing-success.patch
+batman-adv-avoid-double-free-when-orig_node-initialization-fails.patch
+batman-adv-fix-potential-kernel-paging-error-for-unicast-transmissions.patch
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: John Ogness <john.ogness@linutronix.de>
+Date: Sun, 9 Feb 2014 18:40:11 -0800
+Subject: tcp: tsq: fix nonagle handling
+
+From: John Ogness <john.ogness@linutronix.de>
+
+[ Upstream commit bf06200e732de613a1277984bf34d1a21c2de03d ]
+
+Commit 46d3ceabd8d9 ("tcp: TCP Small Queues") introduced a possible
+regression for applications using TCP_NODELAY.
+
+If TCP session is throttled because of tsq, we should consult
+tp->nonagle when TX completion is done and allow us to send additional
+segment, especially if this segment is not a full MSS.
+Otherwise this segment is sent after an RTO.
+
+[edumazet] : Cooked the changelog, added another fix about testing
+sk_wmem_alloc twice because TX completion can happen right before
+setting TSQ_THROTTLED bit.
+
+This problem is particularly visible with recent auto corking,
+but might also be triggered with low tcp_limit_output_bytes
+values or NIC drivers delaying TX completion by hundred of usec,
+and very low rtt.
+
+Thomas Glanzmann for example reported an iscsi regression, caused
+by tcp auto corking making this bug quite visible.
+
+Fixes: 46d3ceabd8d9 ("tcp: TCP Small Queues")
+Signed-off-by: John Ogness <john.ogness@linutronix.de>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Thomas Glanzmann <thomas@glanzmann.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -696,7 +696,8 @@ static void tcp_tsq_handler(struct sock
+ if ((1 << sk->sk_state) &
+ (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
+ TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
+- tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
++ tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
++ 0, GFP_ATOMIC);
+ }
+ /*
+ * One tasklest per cpu tries to send more skbs.
+@@ -1884,7 +1885,15 @@ static bool tcp_write_xmit(struct sock *
+
+ if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+ set_bit(TSQ_THROTTLED, &tp->tsq_flags);
+- break;
++ /* It is possible TX completion already happened
++ * before we set TSQ_THROTTLED, so we must
++ * test again the condition.
++ * We abuse smp_mb__after_clear_bit() because
++ * there is no smp_mb__after_set_bit() yet
++ */
++ smp_mb__after_clear_bit();
++ if (atomic_read(&sk->sk_wmem_alloc) > limit)
++ break;
+ }
+
+ limit = mss_now;
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Nithin Sujir <nsujir@broadcom.com>
+Date: Thu, 6 Feb 2014 14:13:05 -0800
+Subject: tg3: Fix deadlock in tg3_change_mtu()
+
+From: Nithin Sujir <nsujir@broadcom.com>
+
+[ Upstream commit c6993dfd7db9b0c6b7ca7503a56fda9236a4710f ]
+
+Quoting David Vrabel -
+"5780 cards cannot have jumbo frames and TSO enabled together. When
+jumbo frames are enabled by setting the MTU, the TSO feature must be
+cleared. This is done indirectly by calling netdev_update_features()
+which will call tg3_fix_features() to actually clear the flags.
+
+netdev_update_features() will also trigger a new netlink message for the
+feature change event which will result in a call to tg3_get_stats64()
+which deadlocks on the tg3 lock."
+
+tg3_set_mtu() does not need to be under the tg3 lock since converting
+the flags to use set_bit(). Move it out to after tg3_netif_stop().
+
+Reported-by: David Vrabel <david.vrabel@citrix.com>
+Tested-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Michael Chan <mchan@broadcom.com>
+Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -13965,12 +13965,12 @@ static int tg3_change_mtu(struct net_dev
+
+ tg3_netif_stop(tp);
+
++ tg3_set_mtu(dev, tp, new_mtu);
++
+ tg3_full_lock(tp, 1);
+
+ tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
+
+- tg3_set_mtu(dev, tp, new_mtu);
+-
+ /* Reset PHY, otherwise the read DMA engine will be in a mode that
+ * breaks all requests to 256 bytes.
+ */
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: Emil Goode <emilgoode@gmail.com>
+Date: Thu, 13 Feb 2014 17:50:19 +0100
+Subject: usbnet: remove generic hard_header_len check
+
+From: Emil Goode <emilgoode@gmail.com>
+
+[ Upstream commit eb85569fe2d06c2fbf4de7b66c263ca095b397aa ]
+
+This patch removes a generic hard_header_len check from the usbnet
+module that is causing dropped packages under certain circumstances
+for devices that send rx packets that cross urb boundaries.
+
+One example is the AX88772B which occasionally send rx packets that
+cross urb boundaries where the remaining partial packet is sent with
+no hardware header. When the buffer with a partial packet is of less
+number of octets than the value of hard_header_len the buffer is
+discarded by the usbnet module.
+
+With AX88772B this can be reproduced by using ping with a packet
+size between 1965-1976.
+
+The bug has been reported here:
+
+https://bugzilla.kernel.org/show_bug.cgi?id=29082
+
+This patch introduces the following changes:
+- Removes the generic hard_header_len check in the rx_complete
+ function in the usbnet module.
+- Introduces a ETH_HLEN check for skbs that are not cloned from
+ within a rx_fixup callback.
+- For safety a hard_header_len check is added to each rx_fixup
+ callback function that could be affected by this change.
+ These extra checks could possibly be removed by someone
+ who has the hardware to test.
+- Removes a call to dev_kfree_skb_any() and instead utilizes the
+ dev->done list to queue skbs for cleanup.
+
+The changes place full responsibility on the rx_fixup callback
+functions that clone skbs to only pass valid skbs to the
+usbnet_skb_return function.
+
+Signed-off-by: Emil Goode <emilgoode@gmail.com>
+Reported-by: Igor Gnatenko <i.gnatenko.brain@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/ax88179_178a.c | 4 ++++
+ drivers/net/usb/gl620a.c | 4 ++++
+ drivers/net/usb/mcs7830.c | 5 +++--
+ drivers/net/usb/net1080.c | 4 ++++
+ drivers/net/usb/qmi_wwan.c | 8 ++++----
+ drivers/net/usb/rndis_host.c | 4 ++++
+ drivers/net/usb/smsc75xx.c | 4 ++++
+ drivers/net/usb/smsc95xx.c | 4 ++++
+ drivers/net/usb/usbnet.c | 25 ++++++++++---------------
+ 9 files changed, 41 insertions(+), 21 deletions(-)
+
+--- a/drivers/net/usb/ax88179_178a.c
++++ b/drivers/net/usb/ax88179_178a.c
+@@ -1119,6 +1119,10 @@ static int ax88179_rx_fixup(struct usbne
+ u16 hdr_off;
+ u32 *pkt_hdr;
+
++ /* This check is no longer done by usbnet */
++ if (skb->len < dev->net->hard_header_len)
++ return 0;
++
+ skb_trim(skb, skb->len - 4);
+ memcpy(&rx_hdr, skb_tail_pointer(skb), 4);
+ le32_to_cpus(&rx_hdr);
+--- a/drivers/net/usb/gl620a.c
++++ b/drivers/net/usb/gl620a.c
+@@ -86,6 +86,10 @@ static int genelink_rx_fixup(struct usbn
+ u32 size;
+ u32 count;
+
++ /* This check is no longer done by usbnet */
++ if (skb->len < dev->net->hard_header_len)
++ return 0;
++
+ header = (struct gl_header *) skb->data;
+
+ // get the packet count of the received skb
+--- a/drivers/net/usb/mcs7830.c
++++ b/drivers/net/usb/mcs7830.c
+@@ -528,8 +528,9 @@ static int mcs7830_rx_fixup(struct usbne
+ {
+ u8 status;
+
+- if (skb->len == 0) {
+- dev_err(&dev->udev->dev, "unexpected empty rx frame\n");
++ /* This check is no longer done by usbnet */
++ if (skb->len < dev->net->hard_header_len) {
++ dev_err(&dev->udev->dev, "unexpected tiny rx frame\n");
+ return 0;
+ }
+
+--- a/drivers/net/usb/net1080.c
++++ b/drivers/net/usb/net1080.c
+@@ -366,6 +366,10 @@ static int net1080_rx_fixup(struct usbne
+ struct nc_trailer *trailer;
+ u16 hdr_len, packet_len;
+
++ /* This check is no longer done by usbnet */
++ if (skb->len < dev->net->hard_header_len)
++ return 0;
++
+ if (!(skb->len & 0x01)) {
+ netdev_dbg(dev->net, "rx framesize %d range %d..%d mtu %d\n",
+ skb->len, dev->net->hard_header_len, dev->hard_mtu,
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -80,10 +80,10 @@ static int qmi_wwan_rx_fixup(struct usbn
+ {
+ __be16 proto;
+
+- /* usbnet rx_complete guarantees that skb->len is at least
+- * hard_header_len, so we can inspect the dest address without
+- * checking skb->len
+- */
++ /* This check is no longer done by usbnet */
++ if (skb->len < dev->net->hard_header_len)
++ return 0;
++
+ switch (skb->data[0] & 0xf0) {
+ case 0x40:
+ proto = htons(ETH_P_IP);
+--- a/drivers/net/usb/rndis_host.c
++++ b/drivers/net/usb/rndis_host.c
+@@ -494,6 +494,10 @@ EXPORT_SYMBOL_GPL(rndis_unbind);
+ */
+ int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
+ {
++ /* This check is no longer done by usbnet */
++ if (skb->len < dev->net->hard_header_len)
++ return 0;
++
+ /* peripheral may have batched packets to us... */
+ while (likely(skb->len)) {
+ struct rndis_data_hdr *hdr = (void *)skb->data;
+--- a/drivers/net/usb/smsc75xx.c
++++ b/drivers/net/usb/smsc75xx.c
+@@ -2108,6 +2108,10 @@ static void smsc75xx_rx_csum_offload(str
+
+ static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
+ {
++ /* This check is no longer done by usbnet */
++ if (skb->len < dev->net->hard_header_len)
++ return 0;
++
+ while (skb->len > 0) {
+ u32 rx_cmd_a, rx_cmd_b, align_count, size;
+ struct sk_buff *ax_skb;
+--- a/drivers/net/usb/smsc95xx.c
++++ b/drivers/net/usb/smsc95xx.c
+@@ -1725,6 +1725,10 @@ static void smsc95xx_rx_csum_offload(str
+
+ static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
+ {
++ /* This check is no longer done by usbnet */
++ if (skb->len < dev->net->hard_header_len)
++ return 0;
++
+ while (skb->len > 0) {
+ u32 header, align_count;
+ struct sk_buff *ax_skb;
+--- a/drivers/net/usb/usbnet.c
++++ b/drivers/net/usb/usbnet.c
+@@ -543,17 +543,19 @@ static inline void rx_process (struct us
+ }
+ // else network stack removes extra byte if we forced a short packet
+
+- if (skb->len) {
+- /* all data was already cloned from skb inside the driver */
+- if (dev->driver_info->flags & FLAG_MULTI_PACKET)
+- dev_kfree_skb_any(skb);
+- else
+- usbnet_skb_return(dev, skb);
++ /* all data was already cloned from skb inside the driver */
++ if (dev->driver_info->flags & FLAG_MULTI_PACKET)
++ goto done;
++
++ if (skb->len < ETH_HLEN) {
++ dev->net->stats.rx_errors++;
++ dev->net->stats.rx_length_errors++;
++ netif_dbg(dev, rx_err, dev->net, "rx length %d\n", skb->len);
++ } else {
++ usbnet_skb_return(dev, skb);
+ return;
+ }
+
+- netif_dbg(dev, rx_err, dev->net, "drop\n");
+- dev->net->stats.rx_errors++;
+ done:
+ skb_queue_tail(&dev->done, skb);
+ }
+@@ -575,13 +577,6 @@ static void rx_complete (struct urb *urb
+ switch (urb_status) {
+ /* success */
+ case 0:
+- if (skb->len < dev->net->hard_header_len) {
+- state = rx_cleanup;
+- dev->net->stats.rx_errors++;
+- dev->net->stats.rx_length_errors++;
+- netif_dbg(dev, rx_err, dev->net,
+- "rx length %d\n", skb->len);
+- }
+ break;
+
+ /* stalls need manual reset. this is rare ... except that
--- /dev/null
+From foo@baz Thu Feb 27 20:11:26 PST 2014
+From: "Michael S. Tsirkin" <mst@redhat.com>
+Date: Thu, 13 Feb 2014 11:42:05 +0200
+Subject: vhost: fix ref cnt checking deadlock
+
+From: "Michael S. Tsirkin" <mst@redhat.com>
+
+[ Upstream commit 0ad8b480d6ee916aa84324f69acf690142aecd0e ]
+
+vhost checked the counter within the refcnt before decrementing. It
+really wanted to know that it is the one that has the last reference, as
+a way to batch freeing resources a bit more efficiently.
+
+Note: we only let refcount go to 0 on device release.
+
+This works well but we now access the ref counter twice so there's a
+race: all users might see a high count and decide to defer freeing
+resources.
+In the end no one initiates freeing resources until the last reference
+is gone (which is on VM shotdown so might happen after a looooong time).
+
+Let's do what we probably should have done straight away:
+switch from kref to plain atomic, documenting the
+semantics, return the refcount value atomically after decrement,
+then use that to avoid the deadlock.
+
+Reported-by: Qin Chuanyu <qinchuanyu@huawei.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/net.c | 41 ++++++++++++++++++++---------------------
+ 1 file changed, 20 insertions(+), 21 deletions(-)
+
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -70,7 +70,12 @@ enum {
+ };
+
+ struct vhost_net_ubuf_ref {
+- struct kref kref;
++ /* refcount follows semantics similar to kref:
++ * 0: object is released
++ * 1: no outstanding ubufs
++ * >1: outstanding ubufs
++ */
++ atomic_t refcount;
+ wait_queue_head_t wait;
+ struct vhost_virtqueue *vq;
+ };
+@@ -116,14 +121,6 @@ static void vhost_net_enable_zcopy(int v
+ vhost_net_zcopy_mask |= 0x1 << vq;
+ }
+
+-static void vhost_net_zerocopy_done_signal(struct kref *kref)
+-{
+- struct vhost_net_ubuf_ref *ubufs;
+-
+- ubufs = container_of(kref, struct vhost_net_ubuf_ref, kref);
+- wake_up(&ubufs->wait);
+-}
+-
+ static struct vhost_net_ubuf_ref *
+ vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy)
+ {
+@@ -134,21 +131,24 @@ vhost_net_ubuf_alloc(struct vhost_virtqu
+ ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL);
+ if (!ubufs)
+ return ERR_PTR(-ENOMEM);
+- kref_init(&ubufs->kref);
++ atomic_set(&ubufs->refcount, 1);
+ init_waitqueue_head(&ubufs->wait);
+ ubufs->vq = vq;
+ return ubufs;
+ }
+
+-static void vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs)
++static int vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs)
+ {
+- kref_put(&ubufs->kref, vhost_net_zerocopy_done_signal);
++ int r = atomic_sub_return(1, &ubufs->refcount);
++ if (unlikely(!r))
++ wake_up(&ubufs->wait);
++ return r;
+ }
+
+ static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs)
+ {
+- kref_put(&ubufs->kref, vhost_net_zerocopy_done_signal);
+- wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
++ vhost_net_ubuf_put(ubufs);
++ wait_event(ubufs->wait, !atomic_read(&ubufs->refcount));
+ }
+
+ static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs)
+@@ -306,22 +306,21 @@ static void vhost_zerocopy_callback(stru
+ {
+ struct vhost_net_ubuf_ref *ubufs = ubuf->ctx;
+ struct vhost_virtqueue *vq = ubufs->vq;
+- int cnt = atomic_read(&ubufs->kref.refcount);
++ int cnt;
+
+ /* set len to mark this desc buffers done DMA */
+ vq->heads[ubuf->desc].len = success ?
+ VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN;
+- vhost_net_ubuf_put(ubufs);
++ cnt = vhost_net_ubuf_put(ubufs);
+
+ /*
+ * Trigger polling thread if guest stopped submitting new buffers:
+- * in this case, the refcount after decrement will eventually reach 1
+- * so here it is 2.
++ * in this case, the refcount after decrement will eventually reach 1.
+ * We also trigger polling periodically after each 16 packets
+ * (the value 16 here is more or less arbitrary, it's tuned to trigger
+ * less than 10% of times).
+ */
+- if (cnt <= 2 || !(cnt % 16))
++ if (cnt <= 1 || !(cnt % 16))
+ vhost_poll_queue(&vq->poll);
+ }
+
+@@ -420,7 +419,7 @@ static void handle_tx(struct vhost_net *
+ msg.msg_control = ubuf;
+ msg.msg_controllen = sizeof(ubuf);
+ ubufs = nvq->ubufs;
+- kref_get(&ubufs->kref);
++ atomic_inc(&ubufs->refcount);
+ nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
+ } else {
+ msg.msg_control = NULL;
+@@ -785,7 +784,7 @@ static void vhost_net_flush(struct vhost
+ vhost_net_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs);
+ mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
+ n->tx_flush = false;
+- kref_init(&n->vqs[VHOST_NET_VQ_TX].ubufs->kref);
++ atomic_set(&n->vqs[VHOST_NET_VQ_TX].ubufs->refcount, 1);
+ mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
+ }
+ }