From: Greg Kroah-Hartman Date: Fri, 28 Feb 2014 04:37:37 +0000 (-0800) Subject: 3.13-stable patches X-Git-Tag: v3.10.33~42 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=b72aa78914cf82e0e39cf561869b93f4ed0ea481;p=thirdparty%2Fkernel%2Fstable-queue.git 3.13-stable patches added patches: 6lowpan-fix-lockdep-splats.patch 9p-trans_virtio.c-fix-broken-zero-copy-on-vmalloc-buffers.patch batman-adv-avoid-double-free-when-orig_node-initialization-fails.patch batman-adv-avoid-potential-race-condition-when-adding-a-new-neighbour.patch batman-adv-fix-potential-kernel-paging-error-for-unicast-transmissions.patch batman-adv-fix-potential-orig_node-reference-leak.patch batman-adv-fix-soft-interface-mtu-computation.patch batman-adv-fix-tt-crc-computation-by-ensuring-byte-order.patch batman-adv-fix-tt-tvlv-parsing-on-ogm-reception.patch batman-adv-free-skb-on-tvlv-parsing-success.patch batman-adv-properly-check-pskb_may_pull-return-value.patch batman-adv-release-vlan-object-after-checking-the-crc.patch bonding-802.3ad-make-aggregator_identifier-bond-private.patch bridge-fix-netconsole-setup-over-bridge.patch can-add-destructor-for-self-generated-skbs.patch gre-add-link-local-route-when-local-addr-is-any.patch hyperv-fix-the-carrier-status-setting.patch ipv4-fix-counter-in_slow_tot.patch ipv4-fix-runtime-warning-in-rtmsg_ifa.patch net-add-and-use-skb_gso_transport_seglen.patch net-asix-add-missing-flag-to-struct-driver_info.patch net-core-introduce-netif_skb_dev_features.patch net-fix-ip-rule-iif-oif-device-rename.patch net-ip-ipv6-handle-gso-skbs-in-forwarding-path.patch net-mvneta-add-missing-bit-descriptions-for-interrupt-masks-and-causes.patch net-mvneta-do-not-schedule-in-mvneta_tx_timeout.patch net-mvneta-increase-the-64-bit-rx-tx-stats-out-of-the-hot-path.patch net-mvneta-replace-tx-timer-with-a-real-interrupt.patch net-mvneta-use-per_cpu-stats-to-fix-an-smp-lock-up.patch netpoll-fix-netconsole-ipv6-setup.patch net-qmi_wwan-add-netgear-aircard-340u.patch net-sctp-fix-sctp_connectx-abi-for-ia32-emulation-compat-mode.patch net-use-__gfp_noretry-for-high-order-allocations.patch tcp-tsq-fix-nonagle-handling.patch tg3-fix-deadlock-in-tg3_change_mtu.patch usbnet-remove-generic-hard_header_len-check.patch vhost-fix-ref-cnt-checking-deadlock.patch --- diff --git a/queue-3.13/6lowpan-fix-lockdep-splats.patch b/queue-3.13/6lowpan-fix-lockdep-splats.patch new file mode 100644 index 00000000000..de4e3a5d283 --- /dev/null +++ b/queue-3.13/6lowpan-fix-lockdep-splats.patch @@ -0,0 +1,59 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Eric Dumazet +Date: Mon, 10 Feb 2014 11:42:35 -0800 +Subject: 6lowpan: fix lockdep splats + +From: Eric Dumazet + +[ Upstream commit 20e7c4e80dcd01dad5e6c8b32455228b8fe9c619 ] + +When a device ndo_start_xmit() calls again dev_queue_xmit(), +lockdep can complain because dev_queue_xmit() is re-entered and the +spinlocks protecting tx queues share a common lockdep class. + +Same issue was fixed for bonding/l2tp/ppp in commits + +0daa2303028a6 ("[PATCH] bonding: lockdep annotation") +49ee49202b4ac ("bonding: set qdisc_tx_busylock to avoid LOCKDEP splat") +23d3b8bfb8eb2 ("net: qdisc busylock needs lockdep annotations ") +303c07db487be ("ppp: set qdisc_tx_busylock to avoid LOCKDEP splat ") + +Reported-by: Alexander Aring +Signed-off-by: Eric Dumazet +Tested-by: Alexander Aring +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ieee802154/6lowpan.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +--- a/net/ieee802154/6lowpan.c ++++ b/net/ieee802154/6lowpan.c +@@ -1249,7 +1249,27 @@ static struct header_ops lowpan_header_o + .create = lowpan_header_create, + }; + ++static struct lock_class_key lowpan_tx_busylock; ++static struct lock_class_key lowpan_netdev_xmit_lock_key; ++ ++static void lowpan_set_lockdep_class_one(struct net_device *dev, ++ struct netdev_queue *txq, ++ void *_unused) ++{ ++ lockdep_set_class(&txq->_xmit_lock, ++ &lowpan_netdev_xmit_lock_key); ++} ++ ++ ++static int lowpan_dev_init(struct net_device *dev) ++{ ++ netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL); ++ dev->qdisc_tx_busylock = &lowpan_tx_busylock; ++ return 0; ++} ++ + static const struct net_device_ops lowpan_netdev_ops = { ++ .ndo_init = lowpan_dev_init, + .ndo_start_xmit = lowpan_xmit, + .ndo_set_mac_address = lowpan_set_address, + }; diff --git a/queue-3.13/9p-trans_virtio.c-fix-broken-zero-copy-on-vmalloc-buffers.patch b/queue-3.13/9p-trans_virtio.c-fix-broken-zero-copy-on-vmalloc-buffers.patch new file mode 100644 index 00000000000..12402d66601 --- /dev/null +++ b/queue-3.13/9p-trans_virtio.c-fix-broken-zero-copy-on-vmalloc-buffers.patch @@ -0,0 +1,66 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Richard Yao +Date: Sat, 8 Feb 2014 19:32:01 -0500 +Subject: 9p/trans_virtio.c: Fix broken zero-copy on vmalloc() buffers + +From: Richard Yao + +[ Upstream commit b6f52ae2f0d32387bde2b89883e3b64d88b9bfe8 ] + +The 9p-virtio transport does zero copy on things larger than 1024 bytes +in size. It accomplishes this by returning the physical addresses of +pages to the virtio-pci device. At present, the translation is usually a +bit shift. + +That approach produces an invalid page address when we read/write to +vmalloc buffers, such as those used for Linux kernel modules. Any +attempt to load a Linux kernel module from 9p-virtio produces the +following stack. + +[] p9_virtio_zc_request+0x45e/0x510 +[] p9_client_zc_rpc.constprop.16+0xfd/0x4f0 +[] p9_client_read+0x15d/0x240 +[] v9fs_fid_readn+0x50/0xa0 +[] v9fs_file_readn+0x10/0x20 +[] v9fs_file_read+0x37/0x70 +[] vfs_read+0x9b/0x160 +[] kernel_read+0x41/0x60 +[] copy_module_from_fd.isra.34+0xfb/0x180 + +Subsequently, QEMU will die printing: + +qemu-system-x86_64: virtio: trying to map MMIO memory + +This patch enables 9p-virtio to correctly handle this case. This not +only enables us to load Linux kernel modules off virtfs, but also +enables ZFS file-based vdevs on virtfs to be used without killing QEMU. + +Special thanks to both Avi Kivity and Alexander Graf for their +interpretation of QEMU backtraces. Without their guidence, tracking down +this bug would have taken much longer. Also, special thanks to Linus +Torvalds for his insightful explanation of why this should use +is_vmalloc_addr() instead of is_vmalloc_or_module_addr(): + +https://lkml.org/lkml/2014/2/8/272 + +Signed-off-by: Richard Yao +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/9p/trans_virtio.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/9p/trans_virtio.c ++++ b/net/9p/trans_virtio.c +@@ -340,7 +340,10 @@ static int p9_get_mapped_pages(struct vi + int count = nr_pages; + while (nr_pages) { + s = rest_of_page(data); +- pages[index++] = kmap_to_page(data); ++ if (is_vmalloc_addr(data)) ++ pages[index++] = vmalloc_to_page(data); ++ else ++ pages[index++] = kmap_to_page(data); + data += s; + nr_pages--; + } diff --git a/queue-3.13/batman-adv-avoid-double-free-when-orig_node-initialization-fails.patch b/queue-3.13/batman-adv-avoid-double-free-when-orig_node-initialization-fails.patch new file mode 100644 index 00000000000..2954e629a33 --- /dev/null +++ b/queue-3.13/batman-adv-avoid-double-free-when-orig_node-initialization-fails.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Antonio Quartulli +Date: Sat, 15 Feb 2014 02:17:20 +0100 +Subject: batman-adv: avoid double free when orig_node initialization fails + +From: Antonio Quartulli + +[ Upstream commit a5a5cb8cab526af2f6cbe9715f8ca843192f0d81 ] + +In the failure path of the orig_node initialization routine +the orig_node->bat_iv.bcast_own field is free'd twice: first +in batadv_iv_ogm_orig_get() and then later in +batadv_orig_node_free_rcu(). + +Fix it by removing the kfree in batadv_iv_ogm_orig_get(). + +Signed-off-by: Antonio Quartulli +Signed-off-by: Marek Lindner +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/bat_iv_ogm.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/net/batman-adv/bat_iv_ogm.c ++++ b/net/batman-adv/bat_iv_ogm.c +@@ -243,18 +243,16 @@ batadv_iv_ogm_orig_get(struct batadv_pri + size = bat_priv->num_ifaces * sizeof(uint8_t); + orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC); + if (!orig_node->bat_iv.bcast_own_sum) +- goto free_bcast_own; ++ goto free_orig_node; + + hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig, + batadv_choose_orig, orig_node, + &orig_node->hash_entry); + if (hash_added != 0) +- goto free_bcast_own; ++ goto free_orig_node; + + return orig_node; + +-free_bcast_own: +- kfree(orig_node->bat_iv.bcast_own); + free_orig_node: + /* free twice, as batadv_orig_node_new sets refcount to 2 */ + batadv_orig_node_free_ref(orig_node); diff --git a/queue-3.13/batman-adv-avoid-potential-race-condition-when-adding-a-new-neighbour.patch b/queue-3.13/batman-adv-avoid-potential-race-condition-when-adding-a-new-neighbour.patch new file mode 100644 index 00000000000..6f57516d7cc --- /dev/null +++ b/queue-3.13/batman-adv-avoid-potential-race-condition-when-adding-a-new-neighbour.patch @@ -0,0 +1,126 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Antonio Quartulli +Date: Wed, 29 Jan 2014 11:25:12 +0100 +Subject: batman-adv: avoid potential race condition when adding a new neighbour + +From: Antonio Quartulli + +[ Upstream commit 08bf0ed29c7ded45c477d08618220dd200c3524a ] + +When adding a new neighbour it is important to atomically +perform the following: +- check if the neighbour already exists +- append the neighbour to the proper list + +If the two operations are not performed in an atomic context +it is possible that two concurrent insertions add the same +neighbour twice. + +Signed-off-by: Antonio Quartulli +Signed-off-by: Marek Lindner +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/bat_iv_ogm.c | 22 ++++++++++++++++------ + net/batman-adv/originator.c | 36 ++++++++++++++++++++++++++++++++++++ + net/batman-adv/originator.h | 4 ++++ + 3 files changed, 56 insertions(+), 6 deletions(-) + +--- a/net/batman-adv/bat_iv_ogm.c ++++ b/net/batman-adv/bat_iv_ogm.c +@@ -268,7 +268,7 @@ batadv_iv_ogm_neigh_new(struct batadv_ha + struct batadv_orig_node *orig_neigh) + { + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); +- struct batadv_neigh_node *neigh_node; ++ struct batadv_neigh_node *neigh_node, *tmp_neigh_node; + + neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, orig_node); + if (!neigh_node) +@@ -276,14 +276,24 @@ batadv_iv_ogm_neigh_new(struct batadv_ha + + spin_lock_init(&neigh_node->bat_iv.lq_update_lock); + +- batadv_dbg(BATADV_DBG_BATMAN, bat_priv, +- "Creating new neighbor %pM for orig_node %pM on interface %s\n", +- neigh_addr, orig_node->orig, hard_iface->net_dev->name); +- + spin_lock_bh(&orig_node->neigh_list_lock); +- hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); ++ tmp_neigh_node = batadv_neigh_node_get(orig_node, hard_iface, ++ neigh_addr); ++ if (!tmp_neigh_node) { ++ hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); ++ } else { ++ kfree(neigh_node); ++ batadv_hardif_free_ref(hard_iface); ++ neigh_node = tmp_neigh_node; ++ } + spin_unlock_bh(&orig_node->neigh_list_lock); + ++ if (!tmp_neigh_node) ++ batadv_dbg(BATADV_DBG_BATMAN, bat_priv, ++ "Creating new neighbor %pM for orig_node %pM on interface %s\n", ++ neigh_addr, orig_node->orig, ++ hard_iface->net_dev->name); ++ + out: + return neigh_node; + } +--- a/net/batman-adv/originator.c ++++ b/net/batman-adv/originator.c +@@ -511,6 +511,42 @@ void batadv_purge_orig_ref(struct batadv + _batadv_purge_orig(bat_priv); + } + ++/** ++ * batadv_neigh_node_get - retrieve a neighbour from the list ++ * @orig_node: originator which the neighbour belongs to ++ * @hard_iface: the interface where this neighbour is connected to ++ * @addr: the address of the neighbour ++ * ++ * Looks for and possibly returns a neighbour belonging to this originator list ++ * which is connected through the provided hard interface. ++ * Returns NULL if the neighbour is not found. ++ */ ++struct batadv_neigh_node * ++batadv_neigh_node_get(const struct batadv_orig_node *orig_node, ++ const struct batadv_hard_iface *hard_iface, ++ const uint8_t *addr) ++{ ++ struct batadv_neigh_node *tmp_neigh_node, *res = NULL; ++ ++ rcu_read_lock(); ++ hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { ++ if (!batadv_compare_eth(tmp_neigh_node->addr, addr)) ++ continue; ++ ++ if (tmp_neigh_node->if_incoming != hard_iface) ++ continue; ++ ++ if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) ++ continue; ++ ++ res = tmp_neigh_node; ++ break; ++ } ++ rcu_read_unlock(); ++ ++ return res; ++} ++ + int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) + { + struct net_device *net_dev = (struct net_device *)seq->private; +--- a/net/batman-adv/originator.h ++++ b/net/batman-adv/originator.h +@@ -31,6 +31,10 @@ void batadv_orig_node_free_ref_now(struc + struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, + const uint8_t *addr); + struct batadv_neigh_node * ++batadv_neigh_node_get(const struct batadv_orig_node *orig_node, ++ const struct batadv_hard_iface *hard_iface, ++ const uint8_t *addr); ++struct batadv_neigh_node * + batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, + const uint8_t *neigh_addr, + struct batadv_orig_node *orig_node); diff --git a/queue-3.13/batman-adv-fix-potential-kernel-paging-error-for-unicast-transmissions.patch b/queue-3.13/batman-adv-fix-potential-kernel-paging-error-for-unicast-transmissions.patch new file mode 100644 index 00000000000..5269e4173b1 --- /dev/null +++ b/queue-3.13/batman-adv-fix-potential-kernel-paging-error-for-unicast-transmissions.patch @@ -0,0 +1,66 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Antonio Quartulli +Date: Sat, 15 Feb 2014 21:50:37 +0100 +Subject: batman-adv: fix potential kernel paging error for unicast transmissions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Antonio Quartulli + +[ Upstream commit 70b271a78beba787155d6696aacd7c4d4a251c50 ] + +batadv_send_skb_prepare_unicast(_4addr) might reallocate the +skb's data. If it does then our ethhdr pointer is not valid +anymore in batadv_send_skb_unicast(), resulting in a kernel +paging error. + +Fixing this by refetching the ethhdr pointer after the +potential reallocation. + +Signed-off-by: Linus Lüssing +Signed-off-by: Antonio Quartulli +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/send.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/net/batman-adv/send.c ++++ b/net/batman-adv/send.c +@@ -256,9 +256,9 @@ static int batadv_send_skb_unicast(struc + struct batadv_orig_node *orig_node, + unsigned short vid) + { +- struct ethhdr *ethhdr = (struct ethhdr *)skb->data; ++ struct ethhdr *ethhdr; + struct batadv_unicast_packet *unicast_packet; +- int ret = NET_XMIT_DROP; ++ int ret = NET_XMIT_DROP, hdr_size; + + if (!orig_node) + goto out; +@@ -267,12 +267,16 @@ static int batadv_send_skb_unicast(struc + case BATADV_UNICAST: + if (!batadv_send_skb_prepare_unicast(skb, orig_node)) + goto out; ++ ++ hdr_size = sizeof(*unicast_packet); + break; + case BATADV_UNICAST_4ADDR: + if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, skb, + orig_node, + packet_subtype)) + goto out; ++ ++ hdr_size = sizeof(struct batadv_unicast_4addr_packet); + break; + default: + /* this function supports UNICAST and UNICAST_4ADDR only. It +@@ -281,6 +285,7 @@ static int batadv_send_skb_unicast(struc + goto out; + } + ++ ethhdr = (struct ethhdr *)(skb->data + hdr_size); + unicast_packet = (struct batadv_unicast_packet *)skb->data; + + /* inform the destination node that we are still missing a correct route diff --git a/queue-3.13/batman-adv-fix-potential-orig_node-reference-leak.patch b/queue-3.13/batman-adv-fix-potential-orig_node-reference-leak.patch new file mode 100644 index 00000000000..79b36b50c15 --- /dev/null +++ b/queue-3.13/batman-adv-fix-potential-orig_node-reference-leak.patch @@ -0,0 +1,35 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Simon Wunderlich +Date: Sat, 8 Feb 2014 16:45:06 +0100 +Subject: batman-adv: fix potential orig_node reference leak + +From: Simon Wunderlich + +[ Upstream commit b2262df7fcf2c395eca564df83238e931d88d7bf ] + +Since batadv_orig_node_new() sets the refcount to two, assuming that +the calling function will use a reference for putting the orig_node into +a hash or similar, both references must be freed if initialization of +the orig_node fails. Otherwise that object may be leaked in that error +case. + +Reported-by: Antonio Quartulli +Signed-off-by: Simon Wunderlich +Signed-off-by: Marek Lindner +Signed-off-by: Antonio Quartulli +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/bat_iv_ogm.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/batman-adv/bat_iv_ogm.c ++++ b/net/batman-adv/bat_iv_ogm.c +@@ -256,6 +256,8 @@ batadv_iv_ogm_orig_get(struct batadv_pri + free_bcast_own: + kfree(orig_node->bat_iv.bcast_own); + free_orig_node: ++ /* free twice, as batadv_orig_node_new sets refcount to 2 */ ++ batadv_orig_node_free_ref(orig_node); + batadv_orig_node_free_ref(orig_node); + + return NULL; diff --git a/queue-3.13/batman-adv-fix-soft-interface-mtu-computation.patch b/queue-3.13/batman-adv-fix-soft-interface-mtu-computation.patch new file mode 100644 index 00000000000..c4dd29189e7 --- /dev/null +++ b/queue-3.13/batman-adv-fix-soft-interface-mtu-computation.patch @@ -0,0 +1,75 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Antonio Quartulli +Date: Tue, 21 Jan 2014 11:22:05 +0100 +Subject: batman-adv: fix soft-interface MTU computation + +From: Antonio Quartulli + +[ Upstream commit 930cd6e46eadce8b8ed2a232ee536e5fd286c152 ] + +The current MTU computation always returns a value +smaller than 1500bytes even if the real interfaces +have an MTU large enough to compensate the batman-adv +overhead. + +Fix the computation by properly returning the highest +admitted value. + +Introduced by a19d3d85e1b854e4a483a55d740a42458085560d +("batman-adv: limit local translation table max size") + +Reported-by: Russell Senior +Signed-off-by: Antonio Quartulli +Signed-off-by: Marek Lindner +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/hard-interface.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +--- a/net/batman-adv/hard-interface.c ++++ b/net/batman-adv/hard-interface.c +@@ -244,7 +244,7 @@ int batadv_hardif_min_mtu(struct net_dev + { + struct batadv_priv *bat_priv = netdev_priv(soft_iface); + const struct batadv_hard_iface *hard_iface; +- int min_mtu = ETH_DATA_LEN; ++ int min_mtu = INT_MAX; + + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { +@@ -259,8 +259,6 @@ int batadv_hardif_min_mtu(struct net_dev + } + rcu_read_unlock(); + +- atomic_set(&bat_priv->packet_size_max, min_mtu); +- + if (atomic_read(&bat_priv->fragmentation) == 0) + goto out; + +@@ -271,13 +269,21 @@ int batadv_hardif_min_mtu(struct net_dev + min_mtu = min_t(int, min_mtu, BATADV_FRAG_MAX_FRAG_SIZE); + min_mtu -= sizeof(struct batadv_frag_packet); + min_mtu *= BATADV_FRAG_MAX_FRAGMENTS; +- atomic_set(&bat_priv->packet_size_max, min_mtu); +- +- /* with fragmentation enabled we can fragment external packets easily */ +- min_mtu = min_t(int, min_mtu, ETH_DATA_LEN); + + out: +- return min_mtu - batadv_max_header_len(); ++ /* report to the other components the maximum amount of bytes that ++ * batman-adv can send over the wire (without considering the payload ++ * overhead). For example, this value is used by TT to compute the ++ * maximum local table table size ++ */ ++ atomic_set(&bat_priv->packet_size_max, min_mtu); ++ ++ /* the real soft-interface MTU is computed by removing the payload ++ * overhead from the maximum amount of bytes that was just computed. ++ * ++ * However batman-adv does not support MTUs bigger than ETH_DATA_LEN ++ */ ++ return min_t(int, min_mtu - batadv_max_header_len(), ETH_DATA_LEN); + } + + /* adjusts the MTU if a new interface with a smaller MTU appeared. */ diff --git a/queue-3.13/batman-adv-fix-tt-crc-computation-by-ensuring-byte-order.patch b/queue-3.13/batman-adv-fix-tt-crc-computation-by-ensuring-byte-order.patch new file mode 100644 index 00000000000..5a0ccb24bee --- /dev/null +++ b/queue-3.13/batman-adv-fix-tt-crc-computation-by-ensuring-byte-order.patch @@ -0,0 +1,81 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Antonio Quartulli +Date: Tue, 11 Feb 2014 17:05:06 +0100 +Subject: batman-adv: fix TT CRC computation by ensuring byte order + +From: Antonio Quartulli + +[ Upstream commit a30e22ca8464c2dc573e0144a972221c2f06c2cd ] + +When computing the CRC on a 2byte variable the order of +the bytes obviously alters the final result. This means +that computing the CRC over the same value on two archs +having different endianess leads to different numbers. + +The global and local translation table CRC computation +routine makes this mistake while processing the clients +VIDs. The result is a continuous CRC mismatching between +nodes having different endianess. + +Fix this by converting the VID to Network Order before +processing it. This guarantees that every node uses the same +byte order. + +Introduced by 7ea7b4a142758deaf46c1af0ca9ceca6dd55138b +("batman-adv: make the TT CRC logic VLAN specific") + +Reported-by: Russel Senior +Signed-off-by: Antonio Quartulli +Tested-by: Russell Senior +Signed-off-by: Marek Lindner +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/translation-table.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +--- a/net/batman-adv/translation-table.c ++++ b/net/batman-adv/translation-table.c +@@ -1961,6 +1961,7 @@ static uint32_t batadv_tt_global_crc(str + struct hlist_head *head; + uint32_t i, crc_tmp, crc = 0; + uint8_t flags; ++ __be16 tmp_vid; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; +@@ -1997,8 +1998,11 @@ static uint32_t batadv_tt_global_crc(str + orig_node)) + continue; + +- crc_tmp = crc32c(0, &tt_common->vid, +- sizeof(tt_common->vid)); ++ /* use network order to read the VID: this ensures that ++ * every node reads the bytes in the same order. ++ */ ++ tmp_vid = htons(tt_common->vid); ++ crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid)); + + /* compute the CRC on flags that have to be kept in sync + * among nodes +@@ -2032,6 +2036,7 @@ static uint32_t batadv_tt_local_crc(stru + struct hlist_head *head; + uint32_t i, crc_tmp, crc = 0; + uint8_t flags; ++ __be16 tmp_vid; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; +@@ -2050,8 +2055,11 @@ static uint32_t batadv_tt_local_crc(stru + if (tt_common->flags & BATADV_TT_CLIENT_NEW) + continue; + +- crc_tmp = crc32c(0, &tt_common->vid, +- sizeof(tt_common->vid)); ++ /* use network order to read the VID: this ensures that ++ * every node reads the bytes in the same order. ++ */ ++ tmp_vid = htons(tt_common->vid); ++ crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid)); + + /* compute the CRC on flags that have to be kept in sync + * among nodes diff --git a/queue-3.13/batman-adv-fix-tt-tvlv-parsing-on-ogm-reception.patch b/queue-3.13/batman-adv-fix-tt-tvlv-parsing-on-ogm-reception.patch new file mode 100644 index 00000000000..caa86739809 --- /dev/null +++ b/queue-3.13/batman-adv-fix-tt-tvlv-parsing-on-ogm-reception.patch @@ -0,0 +1,35 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Antonio Quartulli +Date: Mon, 27 Jan 2014 12:23:28 +0100 +Subject: batman-adv: fix TT-TVLV parsing on OGM reception + +From: Antonio Quartulli + +[ Upstream commit e889241f45f9cecbc84a6ffed577083ab52e62ee ] + +When accessing a TT-TVLV container in the OGM RX path +the variable pointing to the list of changes to apply is +altered by mistake. + +This makes the TT component read data at the wrong position +in the OGM packet buffer. + +Fix it by removing the bogus pointer alteration. + +Signed-off-by: Antonio Quartulli +Signed-off-by: Marek Lindner +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/translation-table.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/batman-adv/translation-table.c ++++ b/net/batman-adv/translation-table.c +@@ -3204,7 +3204,6 @@ static void batadv_tt_update_orig(struct + + spin_lock_bh(&orig_node->tt_lock); + +- tt_change = (struct batadv_tvlv_tt_change *)tt_buff; + batadv_tt_update_changes(bat_priv, orig_node, tt_num_changes, + ttvn, tt_change); + diff --git a/queue-3.13/batman-adv-free-skb-on-tvlv-parsing-success.patch b/queue-3.13/batman-adv-free-skb-on-tvlv-parsing-success.patch new file mode 100644 index 00000000000..5e78ba2c1e1 --- /dev/null +++ b/queue-3.13/batman-adv-free-skb-on-tvlv-parsing-success.patch @@ -0,0 +1,37 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Antonio Quartulli +Date: Tue, 11 Feb 2014 17:05:07 +0100 +Subject: batman-adv: free skb on TVLV parsing success + +From: Antonio Quartulli + +[ Upstream commit 05c3c8a636aa9ee35ce13f65afc5b665615cc786 ] + +When the TVLV parsing routine succeed the skb is left +untouched thus leading to a memory leak. + +Fix this by consuming the skb in case of success. + +Introduced by ef26157747d42254453f6b3ac2bd8bd3c53339c3 +("batman-adv: tvlv - basic infrastructure") + +Reported-by: Russel Senior +Signed-off-by: Antonio Quartulli +Tested-by: Russell Senior +Signed-off-by: Marek Lindner +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/routing.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/batman-adv/routing.c ++++ b/net/batman-adv/routing.c +@@ -1063,6 +1063,8 @@ int batadv_recv_unicast_tvlv(struct sk_b + + if (ret != NET_RX_SUCCESS) + ret = batadv_route_unicast_packet(skb, recv_if); ++ else ++ consume_skb(skb); + + return ret; + } diff --git a/queue-3.13/batman-adv-properly-check-pskb_may_pull-return-value.patch b/queue-3.13/batman-adv-properly-check-pskb_may_pull-return-value.patch new file mode 100644 index 00000000000..94a447040ec --- /dev/null +++ b/queue-3.13/batman-adv-properly-check-pskb_may_pull-return-value.patch @@ -0,0 +1,34 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Antonio Quartulli +Date: Thu, 30 Jan 2014 00:12:24 +0100 +Subject: batman-adv: properly check pskb_may_pull return value + +From: Antonio Quartulli + +[ Upstream commit f1791425cf0bcda43ab9a9a37df1ad3ccb1f6654 ] + +pskb_may_pull() returns 1 on success and 0 in case of failure, +therefore checking for the return value being negative does +not make sense at all. + +This way if the function fails we will probably read beyond the current +skb data buffer. Fix this by doing the proper check. + +Signed-off-by: Antonio Quartulli +Signed-off-by: Marek Lindner +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/routing.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/batman-adv/routing.c ++++ b/net/batman-adv/routing.c +@@ -833,7 +833,7 @@ static int batadv_check_unicast_ttvn(str + int is_old_ttvn; + + /* check if there is enough data before accessing it */ +- if (pskb_may_pull(skb, hdr_len + ETH_HLEN) < 0) ++ if (!pskb_may_pull(skb, hdr_len + ETH_HLEN)) + return 0; + + /* create a copy of the skb (in case of for re-routing) to modify it. */ diff --git a/queue-3.13/batman-adv-release-vlan-object-after-checking-the-crc.patch b/queue-3.13/batman-adv-release-vlan-object-after-checking-the-crc.patch new file mode 100644 index 00000000000..bba8e3354a6 --- /dev/null +++ b/queue-3.13/batman-adv-release-vlan-object-after-checking-the-crc.patch @@ -0,0 +1,50 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Antonio Quartulli +Date: Tue, 28 Jan 2014 02:06:47 +0100 +Subject: batman-adv: release vlan object after checking the CRC + +From: Antonio Quartulli + +[ Upstream commit 91c2b1a9f680ff105369d49abc7e19ca7efb33e1 ] + +There is a refcounter unbalance in the CRC checking routine +invoked on OGM reception. A vlan object is retrieved (thus +its refcounter is increased by one) but it is never properly +released. This leads to a memleak because the vlan object +will never be free'd. + +Fix this by releasing the vlan object after having read the +CRC. + +Reported-by: Russell Senior +Reported-by: Daniel +Reported-by: cmsv +Signed-off-by: Antonio Quartulli +Signed-off-by: Marek Lindner +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/translation-table.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/net/batman-adv/translation-table.c ++++ b/net/batman-adv/translation-table.c +@@ -2248,6 +2248,7 @@ static bool batadv_tt_global_check_crc(s + { + struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp; + struct batadv_orig_node_vlan *vlan; ++ uint32_t crc; + int i; + + /* check if each received CRC matches the locally stored one */ +@@ -2267,7 +2268,10 @@ static bool batadv_tt_global_check_crc(s + if (!vlan) + return false; + +- if (vlan->tt.crc != ntohl(tt_vlan_tmp->crc)) ++ crc = vlan->tt.crc; ++ batadv_orig_node_vlan_free_ref(vlan); ++ ++ if (crc != ntohl(tt_vlan_tmp->crc)) + return false; + } + diff --git a/queue-3.13/bonding-802.3ad-make-aggregator_identifier-bond-private.patch b/queue-3.13/bonding-802.3ad-make-aggregator_identifier-bond-private.patch new file mode 100644 index 00000000000..0ad597b2152 --- /dev/null +++ b/queue-3.13/bonding-802.3ad-make-aggregator_identifier-bond-private.patch @@ -0,0 +1,77 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Jiri Bohac +Date: Fri, 14 Feb 2014 18:13:50 +0100 +Subject: bonding: 802.3ad: make aggregator_identifier bond-private + +From: Jiri Bohac + +[ Upstream commit 163c8ff30dbe473abfbb24a7eac5536c87f3baa9 ] + +aggregator_identifier is used to assign unique aggregator identifiers +to aggregators of a bond during device enslaving. + +aggregator_identifier is currently a global variable that is zeroed in +bond_3ad_initialize(). + +This sequence will lead to duplicate aggregator identifiers for eth1 and eth3: + +create bond0 +change bond0 mode to 802.3ad +enslave eth0 to bond0 //eth0 gets agg id 1 +enslave eth1 to bond0 //eth1 gets agg id 2 +create bond1 +change bond1 mode to 802.3ad +enslave eth2 to bond1 //aggregator_identifier is reset to 0 + //eth2 gets agg id 1 +enslave eth3 to bond0 //eth3 gets agg id 2 + +Fix this by making aggregator_identifier private to the bond. + +Signed-off-by: Jiri Bohac +Acked-by: Veaceslav Falico +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_3ad.c | 6 ++---- + drivers/net/bonding/bond_3ad.h | 1 + + 2 files changed, 3 insertions(+), 4 deletions(-) + +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -1806,8 +1806,6 @@ void bond_3ad_initiate_agg_selection(str + BOND_AD_INFO(bond).agg_select_timer = timeout; + } + +-static u16 aggregator_identifier; +- + /** + * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures + * @bond: bonding struct to work on +@@ -1821,7 +1819,7 @@ void bond_3ad_initialize(struct bonding + if (MAC_ADDRESS_COMPARE(&(BOND_AD_INFO(bond).system.sys_mac_addr), + bond->dev->dev_addr)) { + +- aggregator_identifier = 0; ++ BOND_AD_INFO(bond).aggregator_identifier = 0; + + BOND_AD_INFO(bond).system.sys_priority = 0xFFFF; + BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); +@@ -1892,7 +1890,7 @@ int bond_3ad_bind_slave(struct slave *sl + ad_initialize_agg(aggregator); + + aggregator->aggregator_mac_address = *((struct mac_addr *)bond->dev->dev_addr); +- aggregator->aggregator_identifier = (++aggregator_identifier); ++ aggregator->aggregator_identifier = ++BOND_AD_INFO(bond).aggregator_identifier; + aggregator->slave = slave; + aggregator->is_active = 0; + aggregator->num_of_ports = 0; +--- a/drivers/net/bonding/bond_3ad.h ++++ b/drivers/net/bonding/bond_3ad.h +@@ -253,6 +253,7 @@ struct ad_system { + struct ad_bond_info { + struct ad_system system; /* 802.3ad system structure */ + u32 agg_select_timer; // Timer to select aggregator after all adapter's hand shakes ++ u16 aggregator_identifier; + }; + + struct ad_slave_info { diff --git a/queue-3.13/bridge-fix-netconsole-setup-over-bridge.patch b/queue-3.13/bridge-fix-netconsole-setup-over-bridge.patch new file mode 100644 index 00000000000..eef61b0b18d --- /dev/null +++ b/queue-3.13/bridge-fix-netconsole-setup-over-bridge.patch @@ -0,0 +1,103 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Cong Wang +Date: Thu, 6 Feb 2014 15:00:52 -0800 +Subject: bridge: fix netconsole setup over bridge + +From: Cong Wang + +[ Upstream commit dbe173079ab58a444e12dbebe96f5aec1e0bed1a ] + +Commit 93d8bf9fb8f3 ("bridge: cleanup netpoll code") introduced +a check in br_netpoll_enable(), but this check is incorrect for +br_netpoll_setup(). This patch moves the code after the check +into __br_netpoll_enable() and calls it in br_netpoll_setup(). +For br_add_if(), the check is still needed. + +Fixes: 93d8bf9fb8f3 ("bridge: cleanup netpoll code") +Cc: Toshiaki Makita +Cc: Stephen Hemminger +Cc: David S. Miller +Signed-off-by: Cong Wang +Signed-off-by: Cong Wang +Acked-by: Toshiaki Makita +Tested-by: Toshiaki Makita +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_device.c | 51 ++++++++++++++++++++++++++----------------------- + 1 file changed, 28 insertions(+), 23 deletions(-) + +--- a/net/bridge/br_device.c ++++ b/net/bridge/br_device.c +@@ -226,6 +226,33 @@ static void br_netpoll_cleanup(struct ne + br_netpoll_disable(p); + } + ++static int __br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) ++{ ++ struct netpoll *np; ++ int err; ++ ++ np = kzalloc(sizeof(*p->np), gfp); ++ if (!np) ++ return -ENOMEM; ++ ++ err = __netpoll_setup(np, p->dev, gfp); ++ if (err) { ++ kfree(np); ++ return err; ++ } ++ ++ p->np = np; ++ return err; ++} ++ ++int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) ++{ ++ if (!p->br->dev->npinfo) ++ return 0; ++ ++ return __br_netpoll_enable(p, gfp); ++} ++ + static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, + gfp_t gfp) + { +@@ -236,7 +263,7 @@ static int br_netpoll_setup(struct net_d + list_for_each_entry(p, &br->port_list, list) { + if (!p->dev) + continue; +- err = br_netpoll_enable(p, gfp); ++ err = __br_netpoll_enable(p, gfp); + if (err) + goto fail; + } +@@ -249,28 +276,6 @@ fail: + goto out; + } + +-int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) +-{ +- struct netpoll *np; +- int err; +- +- if (!p->br->dev->npinfo) +- return 0; +- +- np = kzalloc(sizeof(*p->np), gfp); +- if (!np) +- return -ENOMEM; +- +- err = __netpoll_setup(np, p->dev, gfp); +- if (err) { +- kfree(np); +- return err; +- } +- +- p->np = np; +- return err; +-} +- + void br_netpoll_disable(struct net_bridge_port *p) + { + struct netpoll *np = p->np; diff --git a/queue-3.13/can-add-destructor-for-self-generated-skbs.patch b/queue-3.13/can-add-destructor-for-self-generated-skbs.patch new file mode 100644 index 00000000000..910b7ae6dde --- /dev/null +++ b/queue-3.13/can-add-destructor-for-self-generated-skbs.patch @@ -0,0 +1,222 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Oliver Hartkopp +Date: Thu, 30 Jan 2014 10:11:28 +0100 +Subject: can: add destructor for self generated skbs + +From: Oliver Hartkopp + +[ Upstream commit 0ae89beb283a0db5980d1d4781c7d7be2f2810d6 ] + +Self generated skbuffs in net/can/bcm.c are setting a skb->sk reference but +no explicit destructor which is enforced since Linux 3.11 with commit +376c7311bdb6 (net: add a temporary sanity check in skb_orphan()). + +This patch adds some helper functions to make sure that a destructor is +properly defined when a sock reference is assigned to a CAN related skb. +To create an unshared skb owned by the original sock a common helper function +has been introduced to replace open coded functions to create CAN echo skbs. + +Signed-off-by: Oliver Hartkopp +Tested-by: Andre Naujoks +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/can/dev.c | 15 +++------------ + drivers/net/can/janz-ican3.c | 18 ++++-------------- + drivers/net/can/vcan.c | 9 ++++----- + include/linux/can/skb.h | 38 ++++++++++++++++++++++++++++++++++++++ + net/can/af_can.c | 3 ++- + net/can/bcm.c | 4 ++-- + 6 files changed, 53 insertions(+), 34 deletions(-) + +--- a/drivers/net/can/dev.c ++++ b/drivers/net/can/dev.c +@@ -324,19 +324,10 @@ void can_put_echo_skb(struct sk_buff *sk + } + + if (!priv->echo_skb[idx]) { +- struct sock *srcsk = skb->sk; + +- if (atomic_read(&skb->users) != 1) { +- struct sk_buff *old_skb = skb; +- +- skb = skb_clone(old_skb, GFP_ATOMIC); +- kfree_skb(old_skb); +- if (!skb) +- return; +- } else +- skb_orphan(skb); +- +- skb->sk = srcsk; ++ skb = can_create_echo_skb(skb); ++ if (!skb) ++ return; + + /* make settings for echo to reduce code in irq context */ + skb->protocol = htons(ETH_P_CAN); +--- a/drivers/net/can/janz-ican3.c ++++ b/drivers/net/can/janz-ican3.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -1134,20 +1135,9 @@ static void ican3_handle_message(struct + */ + static void ican3_put_echo_skb(struct ican3_dev *mod, struct sk_buff *skb) + { +- struct sock *srcsk = skb->sk; +- +- if (atomic_read(&skb->users) != 1) { +- struct sk_buff *old_skb = skb; +- +- skb = skb_clone(old_skb, GFP_ATOMIC); +- kfree_skb(old_skb); +- if (!skb) +- return; +- } else { +- skb_orphan(skb); +- } +- +- skb->sk = srcsk; ++ skb = can_create_echo_skb(skb); ++ if (!skb) ++ return; + + /* save this skb for tx interrupt echo handling */ + skb_queue_tail(&mod->echoq, skb); +--- a/drivers/net/can/vcan.c ++++ b/drivers/net/can/vcan.c +@@ -46,6 +46,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -109,25 +110,23 @@ static netdev_tx_t vcan_tx(struct sk_buf + stats->rx_packets++; + stats->rx_bytes += cfd->len; + } +- kfree_skb(skb); ++ consume_skb(skb); + return NETDEV_TX_OK; + } + + /* perform standard echo handling for CAN network interfaces */ + + if (loop) { +- struct sock *srcsk = skb->sk; + +- skb = skb_share_check(skb, GFP_ATOMIC); ++ skb = can_create_echo_skb(skb); + if (!skb) + return NETDEV_TX_OK; + + /* receive with packet counting */ +- skb->sk = srcsk; + vcan_rx(skb, dev); + } else { + /* no looped packets => no counting */ +- kfree_skb(skb); ++ consume_skb(skb); + } + return NETDEV_TX_OK; + } +--- a/include/linux/can/skb.h ++++ b/include/linux/can/skb.h +@@ -11,7 +11,9 @@ + #define CAN_SKB_H + + #include ++#include + #include ++#include + + /* + * The struct can_skb_priv is used to transport additional information along +@@ -42,4 +44,40 @@ static inline void can_skb_reserve(struc + skb_reserve(skb, sizeof(struct can_skb_priv)); + } + ++static inline void can_skb_destructor(struct sk_buff *skb) ++{ ++ sock_put(skb->sk); ++} ++ ++static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) ++{ ++ if (sk) { ++ sock_hold(sk); ++ skb->destructor = can_skb_destructor; ++ skb->sk = sk; ++ } ++} ++ ++/* ++ * returns an unshared skb owned by the original sock to be echo'ed back ++ */ ++static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) ++{ ++ if (skb_shared(skb)) { ++ struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); ++ ++ if (likely(nskb)) { ++ can_skb_set_owner(nskb, skb->sk); ++ consume_skb(skb); ++ return nskb; ++ } else { ++ kfree_skb(skb); ++ return NULL; ++ } ++ } ++ ++ /* we can assume to have an unshared skb with proper owner */ ++ return skb; ++} ++ + #endif /* CAN_SKB_H */ +--- a/net/can/af_can.c ++++ b/net/can/af_can.c +@@ -57,6 +57,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -290,7 +291,7 @@ int can_send(struct sk_buff *skb, int lo + return -ENOMEM; + } + +- newskb->sk = skb->sk; ++ can_skb_set_owner(newskb, skb->sk); + newskb->ip_summed = CHECKSUM_UNNECESSARY; + newskb->pkt_type = PACKET_BROADCAST; + } +--- a/net/can/bcm.c ++++ b/net/can/bcm.c +@@ -268,7 +268,7 @@ static void bcm_can_tx(struct bcm_op *op + + /* send with loopback */ + skb->dev = dev; +- skb->sk = op->sk; ++ can_skb_set_owner(skb, op->sk); + can_send(skb, 1); + + /* update statistics */ +@@ -1223,7 +1223,7 @@ static int bcm_tx_send(struct msghdr *ms + + can_skb_prv(skb)->ifindex = dev->ifindex; + skb->dev = dev; +- skb->sk = sk; ++ can_skb_set_owner(skb, sk); + err = can_send(skb, 1); /* send with loopback */ + dev_put(dev); + diff --git a/queue-3.13/gre-add-link-local-route-when-local-addr-is-any.patch b/queue-3.13/gre-add-link-local-route-when-local-addr-is-any.patch new file mode 100644 index 00000000000..9383db2037f --- /dev/null +++ b/queue-3.13/gre-add-link-local-route-when-local-addr-is-any.patch @@ -0,0 +1,48 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Nicolas Dichtel +Date: Mon, 17 Feb 2014 14:22:21 +0100 +Subject: gre: add link local route when local addr is any + +From: Nicolas Dichtel + +[ Upstream commit 08b44656c08c8c2f73cdac2a058be2880e3361f2 ] + +This bug was reported by Steinar H. Gunderson and was introduced by commit +f7cb8886335d ("sit/gre6: don't try to add the same route two times"). + +root@morgental:~# ip tunnel add foo mode gre remote 1.2.3.4 ttl 64 +root@morgental:~# ip link set foo up mtu 1468 +root@morgental:~# ip -6 route show dev foo +fe80::/64 proto kernel metric 256 + +but after the above commit, no such route shows up. + +There is no link local route because dev->dev_addr is 0 (because local ipv4 +address is 0), hence no link local address is configured. + +In this scenario, the link local address is added manually: 'ip -6 addr add +fe80::1 dev foo' and because prefix is /128, no link local route is added by the +kernel. + +Even if the right things to do is to add the link local address with a /64 +prefix, we need to restore the previous behavior to avoid breaking userpace. + +Reported-by: Steinar H. Gunderson +Signed-off-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -2726,6 +2726,8 @@ static void addrconf_gre_config(struct n + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); + if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) + addrconf_add_linklocal(idev, &addr); ++ else ++ addrconf_prefix_route(&addr, 64, dev, 0, 0); + } + #endif + diff --git a/queue-3.13/hyperv-fix-the-carrier-status-setting.patch b/queue-3.13/hyperv-fix-the-carrier-status-setting.patch new file mode 100644 index 00000000000..19d825a4970 --- /dev/null +++ b/queue-3.13/hyperv-fix-the-carrier-status-setting.patch @@ -0,0 +1,145 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Haiyang Zhang +Date: Wed, 12 Feb 2014 16:54:27 -0800 +Subject: hyperv: Fix the carrier status setting + +From: Haiyang Zhang + +[ Upstream commit 891de74d693bb4fefe2efcc6432a4a9a9bee561e ] + +Without this patch, the "cat /sys/class/net/ethN/operstate" shows +"unknown", and "ethtool ethN" shows "Link detected: yes", when VM +boots up with or without vNIC connected. + +This patch fixed the problem. + +Signed-off-by: Haiyang Zhang +Reviewed-by: K. Y. Srinivasan +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc_drv.c | 53 ++++++++++++++++++++++++++++------------ + 1 file changed, 38 insertions(+), 15 deletions(-) + +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -89,8 +89,12 @@ static int netvsc_open(struct net_device + { + struct net_device_context *net_device_ctx = netdev_priv(net); + struct hv_device *device_obj = net_device_ctx->device_ctx; ++ struct netvsc_device *nvdev; ++ struct rndis_device *rdev; + int ret = 0; + ++ netif_carrier_off(net); ++ + /* Open up the device */ + ret = rndis_filter_open(device_obj); + if (ret != 0) { +@@ -100,6 +104,11 @@ static int netvsc_open(struct net_device + + netif_start_queue(net); + ++ nvdev = hv_get_drvdata(device_obj); ++ rdev = nvdev->extension; ++ if (!rdev->link_state) ++ netif_carrier_on(net); ++ + return ret; + } + +@@ -230,23 +239,24 @@ void netvsc_linkstatus_callback(struct h + struct net_device *net; + struct net_device_context *ndev_ctx; + struct netvsc_device *net_device; ++ struct rndis_device *rdev; + + net_device = hv_get_drvdata(device_obj); ++ rdev = net_device->extension; ++ ++ rdev->link_state = status != 1; ++ + net = net_device->ndev; + +- if (!net) { +- netdev_err(net, "got link status but net device " +- "not initialized yet\n"); ++ if (!net || net->reg_state != NETREG_REGISTERED) + return; +- } + ++ ndev_ctx = netdev_priv(net); + if (status == 1) { +- netif_carrier_on(net); +- ndev_ctx = netdev_priv(net); + schedule_delayed_work(&ndev_ctx->dwork, 0); + schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20)); + } else { +- netif_carrier_off(net); ++ schedule_delayed_work(&ndev_ctx->dwork, 0); + } + } + +@@ -389,17 +399,35 @@ static const struct net_device_ops devic + * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add + * another netif_notify_peers() into a delayed work, otherwise GARP packet + * will not be sent after quick migration, and cause network disconnection. ++ * Also, we update the carrier status here. + */ +-static void netvsc_send_garp(struct work_struct *w) ++static void netvsc_link_change(struct work_struct *w) + { + struct net_device_context *ndev_ctx; + struct net_device *net; + struct netvsc_device *net_device; ++ struct rndis_device *rdev; ++ bool notify; ++ ++ rtnl_lock(); + + ndev_ctx = container_of(w, struct net_device_context, dwork.work); + net_device = hv_get_drvdata(ndev_ctx->device_ctx); ++ rdev = net_device->extension; + net = net_device->ndev; +- netdev_notify_peers(net); ++ ++ if (rdev->link_state) { ++ netif_carrier_off(net); ++ notify = false; ++ } else { ++ netif_carrier_on(net); ++ notify = true; ++ } ++ ++ rtnl_unlock(); ++ ++ if (notify) ++ netdev_notify_peers(net); + } + + +@@ -415,13 +443,10 @@ static int netvsc_probe(struct hv_device + if (!net) + return -ENOMEM; + +- /* Set initial state */ +- netif_carrier_off(net); +- + net_device_ctx = netdev_priv(net); + net_device_ctx->device_ctx = dev; + hv_set_drvdata(dev, net); +- INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_send_garp); ++ INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); + INIT_WORK(&net_device_ctx->work, do_set_multicast); + + net->netdev_ops = &device_ops; +@@ -444,8 +469,6 @@ static int netvsc_probe(struct hv_device + } + memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + +- netif_carrier_on(net); +- + ret = register_netdev(net); + if (ret != 0) { + pr_err("Unable to register netdev.\n"); diff --git a/queue-3.13/ipv4-fix-counter-in_slow_tot.patch b/queue-3.13/ipv4-fix-counter-in_slow_tot.patch new file mode 100644 index 00000000000..7ebbbf31599 --- /dev/null +++ b/queue-3.13/ipv4-fix-counter-in_slow_tot.patch @@ -0,0 +1,50 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Duan Jiong +Date: Mon, 17 Feb 2014 15:23:43 +0800 +Subject: ipv4: fix counter in_slow_tot + +From: Duan Jiong + +[ Upstream commit a6254864c08109c66a194612585afc0439005286 ] + +since commit 89aef8921bf("ipv4: Delete routing cache."), the counter +in_slow_tot can't work correctly. + +The counter in_slow_tot increase by one when fib_lookup() return successfully +in ip_route_input_slow(), but actually the dst struct maybe not be created and +cached, so we can increase in_slow_tot after the dst struct is created. + +Signed-off-by: Duan Jiong +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1600,6 +1600,7 @@ static int __mkroute_input(struct sk_buf + rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); ++ RT_CACHE_STAT_INC(in_slow_tot); + + rth->dst.input = ip_forward; + rth->dst.output = ip_output; +@@ -1701,8 +1702,6 @@ static int ip_route_input_slow(struct sk + if (err != 0) + goto no_route; + +- RT_CACHE_STAT_INC(in_slow_tot); +- + if (res.type == RTN_BROADCAST) + goto brd_input; + +@@ -1771,6 +1770,7 @@ local_input: + rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); ++ RT_CACHE_STAT_INC(in_slow_tot); + if (res.type == RTN_UNREACHABLE) { + rth->dst.input= ip_error; + rth->dst.error= -err; diff --git a/queue-3.13/ipv4-fix-runtime-warning-in-rtmsg_ifa.patch b/queue-3.13/ipv4-fix-runtime-warning-in-rtmsg_ifa.patch new file mode 100644 index 00000000000..35a83457258 --- /dev/null +++ b/queue-3.13/ipv4-fix-runtime-warning-in-rtmsg_ifa.patch @@ -0,0 +1,65 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Geert Uytterhoeven +Date: Wed, 5 Feb 2014 08:38:25 +0100 +Subject: ipv4: Fix runtime WARNING in rtmsg_ifa() + +From: Geert Uytterhoeven + +[ Upstream commit 63b5f152eb4a5bb79b9caf7ec37b4201d12f6e66 ] + +On m68k/ARAnyM: + +WARNING: CPU: 0 PID: 407 at net/ipv4/devinet.c:1599 0x316a99() +Modules linked in: +CPU: 0 PID: 407 Comm: ifconfig Not tainted +3.13.0-atari-09263-g0c71d68014d1 #1378 +Stack from 10c4fdf0: + 10c4fdf0 002ffabb 000243e8 00000000 008ced6c 00024416 00316a99 0000063f + 00316a99 00000009 00000000 002501b4 00316a99 0000063f c0a86117 00000080 + c0a86117 00ad0c90 00250a5a 00000014 00ad0c90 00000000 00000000 00000001 + 00b02dd0 00356594 00000000 00356594 c0a86117 eff6c9e4 008ced6c 00000002 + 008ced60 0024f9b4 00250b52 00ad0c90 00000000 00000000 00252390 00ad0c90 + eff6c9e4 0000004f 00000000 00000000 eff6c9e4 8000e25c eff6c9e4 80001020 +Call Trace: [<000243e8>] warn_slowpath_common+0x52/0x6c + [<00024416>] warn_slowpath_null+0x14/0x1a + [<002501b4>] rtmsg_ifa+0xdc/0xf0 + [<00250a5a>] __inet_insert_ifa+0xd6/0x1c2 + [<0024f9b4>] inet_abc_len+0x0/0x42 + [<00250b52>] inet_insert_ifa+0xc/0x12 + [<00252390>] devinet_ioctl+0x2ae/0x5d6 + +Adding some debugging code reveals that net_fill_ifaddr() fails in + + put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, + preferred, valid)) + +nla_put complains: + + lib/nlattr.c:454: skb_tailroom(skb) = 12, nla_total_size(attrlen) = 20 + +Apparently commit 5c766d642bcaffd0c2a5b354db2068515b3846cf ("ipv4: +introduce address lifetime") forgot to take into account the addition of +struct ifa_cacheinfo in inet_nlmsg_size(). Hence add it, like is already +done for ipv6. + +Suggested-by: Cong Wang +Signed-off-by: Geert Uytterhoeven +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/devinet.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/devinet.c ++++ b/net/ipv4/devinet.c +@@ -1435,7 +1435,8 @@ static size_t inet_nlmsg_size(void) + + nla_total_size(4) /* IFA_ADDRESS */ + + nla_total_size(4) /* IFA_LOCAL */ + + nla_total_size(4) /* IFA_BROADCAST */ +- + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ ++ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ ++ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ + } + + static inline u32 cstamp_delta(unsigned long cstamp) diff --git a/queue-3.13/net-add-and-use-skb_gso_transport_seglen.patch b/queue-3.13/net-add-and-use-skb_gso_transport_seglen.patch new file mode 100644 index 00000000000..0f1447ff16d --- /dev/null +++ b/queue-3.13/net-add-and-use-skb_gso_transport_seglen.patch @@ -0,0 +1,108 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Florian Westphal +Date: Fri, 21 Feb 2014 20:46:38 +0100 +Subject: net: add and use skb_gso_transport_seglen() + +From: Florian Westphal + +commit de960aa9ab4decc3304959f69533eef64d05d8e8 upstream. + +This moves part of Eric Dumazets skb_gso_seglen helper from tbf sched to +skbuff core so it may be reused by upcoming ip forwarding path patch. + +Signed-off-by: Florian Westphal +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 1 + + net/core/skbuff.c | 25 +++++++++++++++++++++++++ + net/sched/sch_tbf.c | 13 +++---------- + 3 files changed, 29 insertions(+), 10 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -2371,6 +2371,7 @@ void skb_copy_and_csum_dev(const struct + void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); + int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); + void skb_scrub_packet(struct sk_buff *skb, bool xnet); ++unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); + struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); + + struct skb_checksum_ops { +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -47,6 +47,8 @@ + #include + #include + #include ++#include ++#include + #include + #ifdef CONFIG_NET_CLS_ACT + #include +@@ -3562,3 +3564,26 @@ void skb_scrub_packet(struct sk_buff *sk + nf_reset_trace(skb); + } + EXPORT_SYMBOL_GPL(skb_scrub_packet); ++ ++/** ++ * skb_gso_transport_seglen - Return length of individual segments of a gso packet ++ * ++ * @skb: GSO skb ++ * ++ * skb_gso_transport_seglen is used to determine the real size of the ++ * individual segments, including Layer4 headers (TCP/UDP). ++ * ++ * The MAC/L2 or network (IP, IPv6) headers are not accounted for. ++ */ ++unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) ++{ ++ const struct skb_shared_info *shinfo = skb_shinfo(skb); ++ unsigned int hdr_len; ++ ++ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) ++ hdr_len = tcp_hdrlen(skb); ++ else ++ hdr_len = sizeof(struct udphdr); ++ return hdr_len + shinfo->gso_size; ++} ++EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); +--- a/net/sched/sch_tbf.c ++++ b/net/sched/sch_tbf.c +@@ -21,7 +21,6 @@ + #include + #include + #include +-#include + + + /* Simple Token Bucket Filter. +@@ -148,16 +147,10 @@ static u64 psched_ns_t2l(const struct ps + * Return length of individual segments of a gso packet, + * including all headers (MAC, IP, TCP/UDP) + */ +-static unsigned int skb_gso_seglen(const struct sk_buff *skb) ++static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) + { + unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); +- const struct skb_shared_info *shinfo = skb_shinfo(skb); +- +- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) +- hdr_len += tcp_hdrlen(skb); +- else +- hdr_len += sizeof(struct udphdr); +- return hdr_len + shinfo->gso_size; ++ return hdr_len + skb_gso_transport_seglen(skb); + } + + /* GSO packet is too big, segment it so that tbf can transmit +@@ -202,7 +195,7 @@ static int tbf_enqueue(struct sk_buff *s + int ret; + + if (qdisc_pkt_len(skb) > q->max_size) { +- if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size) ++ if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size) + return tbf_segment(skb, sch); + return qdisc_reshape_fail(skb, sch); + } diff --git a/queue-3.13/net-asix-add-missing-flag-to-struct-driver_info.patch b/queue-3.13/net-asix-add-missing-flag-to-struct-driver_info.patch new file mode 100644 index 00000000000..6975b69fa28 --- /dev/null +++ b/queue-3.13/net-asix-add-missing-flag-to-struct-driver_info.patch @@ -0,0 +1,39 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Emil Goode +Date: Thu, 13 Feb 2014 19:30:39 +0100 +Subject: net: asix: add missing flag to struct driver_info +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Emil Goode + +[ Upstream commit d43ff4cd798911736fb39025ec8004284b1b0bc2 ] + +The struct driver_info ax88178_info is assigned the function +asix_rx_fixup_common as it's rx_fixup callback. This means that +FLAG_MULTI_PACKET must be set as this function is cloning the +data and calling usbnet_skb_return. Not setting this flag leads +to usbnet_skb_return beeing called a second time from within +the rx_process function in the usbnet module. + +Signed-off-by: Emil Goode +Reported-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/asix_devices.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/usb/asix_devices.c ++++ b/drivers/net/usb/asix_devices.c +@@ -918,7 +918,8 @@ static const struct driver_info ax88178_ + .status = asix_status, + .link_reset = ax88178_link_reset, + .reset = ax88178_reset, +- .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR, ++ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | ++ FLAG_MULTI_PACKET, + .rx_fixup = asix_rx_fixup_common, + .tx_fixup = asix_tx_fixup, + }; diff --git a/queue-3.13/net-core-introduce-netif_skb_dev_features.patch b/queue-3.13/net-core-introduce-netif_skb_dev_features.patch new file mode 100644 index 00000000000..95481f87166 --- /dev/null +++ b/queue-3.13/net-core-introduce-netif_skb_dev_features.patch @@ -0,0 +1,103 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Florian Westphal +Date: Fri, 21 Feb 2014 20:46:39 +0100 +Subject: net: core: introduce netif_skb_dev_features + +From: Florian Westphal + +commit d206940319c41df4299db75ed56142177bb2e5f6 upstream. + +Will be used by upcoming ipv4 forward path change that needs to +determine feature mask using skb->dst->dev instead of skb->dev. + +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 7 ++++++- + net/core/dev.c | 22 ++++++++++++---------- + 2 files changed, 18 insertions(+), 11 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -2984,7 +2984,12 @@ void netdev_change_features(struct net_d + void netif_stacked_transfer_operstate(const struct net_device *rootdev, + struct net_device *dev); + +-netdev_features_t netif_skb_features(struct sk_buff *skb); ++netdev_features_t netif_skb_dev_features(struct sk_buff *skb, ++ const struct net_device *dev); ++static inline netdev_features_t netif_skb_features(struct sk_buff *skb) ++{ ++ return netif_skb_dev_features(skb, skb->dev); ++} + + static inline bool net_gso_ok(netdev_features_t features, int gso_type) + { +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2404,7 +2404,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); + * 2. No high memory really exists on this machine. + */ + +-static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) ++static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb) + { + #ifdef CONFIG_HIGHMEM + int i; +@@ -2484,34 +2484,36 @@ static int dev_gso_segment(struct sk_buf + } + + static netdev_features_t harmonize_features(struct sk_buff *skb, +- netdev_features_t features) ++ const struct net_device *dev, ++ netdev_features_t features) + { + if (skb->ip_summed != CHECKSUM_NONE && + !can_checksum_protocol(features, skb_network_protocol(skb))) { + features &= ~NETIF_F_ALL_CSUM; +- } else if (illegal_highdma(skb->dev, skb)) { ++ } else if (illegal_highdma(dev, skb)) { + features &= ~NETIF_F_SG; + } + + return features; + } + +-netdev_features_t netif_skb_features(struct sk_buff *skb) ++netdev_features_t netif_skb_dev_features(struct sk_buff *skb, ++ const struct net_device *dev) + { + __be16 protocol = skb->protocol; +- netdev_features_t features = skb->dev->features; ++ netdev_features_t features = dev->features; + +- if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) ++ if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) + features &= ~NETIF_F_GSO_MASK; + + if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } else if (!vlan_tx_tag_present(skb)) { +- return harmonize_features(skb, features); ++ return harmonize_features(skb, dev, features); + } + +- features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | ++ features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + + if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) +@@ -2519,9 +2521,9 @@ netdev_features_t netif_skb_features(str + NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + +- return harmonize_features(skb, features); ++ return harmonize_features(skb, dev, features); + } +-EXPORT_SYMBOL(netif_skb_features); ++EXPORT_SYMBOL(netif_skb_dev_features); + + /* + * Returns true if either: diff --git a/queue-3.13/net-fix-ip-rule-iif-oif-device-rename.patch b/queue-3.13/net-fix-ip-rule-iif-oif-device-rename.patch new file mode 100644 index 00000000000..9b4ae200a53 --- /dev/null +++ b/queue-3.13/net-fix-ip-rule-iif-oif-device-rename.patch @@ -0,0 +1,45 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Maciej Å»enczykowski +Date: Fri, 7 Feb 2014 16:23:48 -0800 +Subject: net: fix 'ip rule' iif/oif device rename +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Maciej Å»enczykowski + +[ Upstream commit 946c032e5a53992ea45e062ecb08670ba39b99e3 ] + +ip rules with iif/oif references do not update: +(detach/attach) across interface renames. + +Signed-off-by: Maciej Å»enczykowski +CC: Willem de Bruijn +CC: Eric Dumazet +CC: Chris Davis +CC: Carlo Contavalli + +Google-Bug-Id: 12936021 +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/fib_rules.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/net/core/fib_rules.c ++++ b/net/core/fib_rules.c +@@ -745,6 +745,13 @@ static int fib_rules_event(struct notifi + attach_rules(&ops->rules_list, dev); + break; + ++ case NETDEV_CHANGENAME: ++ list_for_each_entry(ops, &net->rules_ops, list) { ++ detach_rules(&ops->rules_list, dev); ++ attach_rules(&ops->rules_list, dev); ++ } ++ break; ++ + case NETDEV_UNREGISTER: + list_for_each_entry(ops, &net->rules_ops, list) + detach_rules(&ops->rules_list, dev); diff --git a/queue-3.13/net-ip-ipv6-handle-gso-skbs-in-forwarding-path.patch b/queue-3.13/net-ip-ipv6-handle-gso-skbs-in-forwarding-path.patch new file mode 100644 index 00000000000..178a5d7f0ef --- /dev/null +++ b/queue-3.13/net-ip-ipv6-handle-gso-skbs-in-forwarding-path.patch @@ -0,0 +1,219 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Florian Westphal +Date: Fri, 21 Feb 2014 20:46:40 +0100 +Subject: net: ip, ipv6: handle gso skbs in forwarding path + +From: Florian Westphal + +commit fe6cc55f3a9a053482a76f5a6b2257cee51b4663 upstream. + +Marcelo Ricardo Leitner reported problems when the forwarding link path +has a lower mtu than the incoming one if the inbound interface supports GRO. + +Given: +Host R1 R2 + +Host sends tcp stream which is routed via R1 and R2. R1 performs GRO. + +In this case, the kernel will fail to send ICMP fragmentation needed +messages (or pkt too big for ipv6), as GSO packets currently bypass dstmtu +checks in forward path. Instead, Linux tries to send out packets exceeding +the mtu. + +When locking route MTU on Host (i.e., no ipv4 DF bit set), R1 does +not fragment the packets when forwarding, and again tries to send out +packets exceeding R1-R2 link mtu. + +This alters the forwarding dstmtu checks to take the individual gso +segment lengths into account. + +For ipv6, we send out pkt too big error for gso if the individual +segments are too big. + +For ipv4, we either send icmp fragmentation needed, or, if the DF bit +is not set, perform software segmentation and let the output path +create fragments when the packet is leaving the machine. +It is not 100% correct as the error message will contain the headers of +the GRO skb instead of the original/segmented one, but it seems to +work fine in my (limited) tests. + +Eric Dumazet suggested to simply shrink mss via ->gso_size to avoid +sofware segmentation. + +However it turns out that skb_segment() assumes skb nr_frags is related +to mss size so we would BUG there. I don't want to mess with it considering +Herbert and Eric disagree on what the correct behavior should be. + +Hannes Frederic Sowa notes that when we would shrink gso_size +skb_segment would then also need to deal with the case where +SKB_MAX_FRAGS would be exceeded. + +This uses sofware segmentation in the forward path when we hit ipv4 +non-DF packets and the outgoing link mtu is too small. Its not perfect, +but given the lack of bug reports wrt. GRO fwd being broken this is a +rare case anyway. Also its not like this could not be improved later +once the dust settles. + +Acked-by: Herbert Xu +Reported-by: Marcelo Ricardo Leitner +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 17 +++++++++++ + net/ipv4/ip_forward.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++-- + net/ipv6/ip6_output.c | 17 ++++++++++- + 3 files changed, 101 insertions(+), 4 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -2811,5 +2811,22 @@ static inline bool skb_head_is_locked(co + { + return !skb->head_frag || skb_cloned(skb); + } ++ ++/** ++ * skb_gso_network_seglen - Return length of individual segments of a gso packet ++ * ++ * @skb: GSO skb ++ * ++ * skb_gso_network_seglen is used to determine the real size of the ++ * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). ++ * ++ * The MAC/L2 header is not accounted for. ++ */ ++static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb) ++{ ++ unsigned int hdr_len = skb_transport_header(skb) - ++ skb_network_header(skb); ++ return hdr_len + skb_gso_transport_seglen(skb); ++} + #endif /* __KERNEL__ */ + #endif /* _LINUX_SKBUFF_H */ +--- a/net/ipv4/ip_forward.c ++++ b/net/ipv4/ip_forward.c +@@ -39,6 +39,71 @@ + #include + #include + ++static bool ip_may_fragment(const struct sk_buff *skb) ++{ ++ return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || ++ !skb->local_df; ++} ++ ++static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) ++{ ++ if (skb->len <= mtu || skb->local_df) ++ return false; ++ ++ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) ++ return false; ++ ++ return true; ++} ++ ++static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb) ++{ ++ unsigned int mtu; ++ ++ if (skb->local_df || !skb_is_gso(skb)) ++ return false; ++ ++ mtu = dst_mtu(skb_dst(skb)); ++ ++ /* if seglen > mtu, do software segmentation for IP fragmentation on ++ * output. DF bit cannot be set since ip_forward would have sent ++ * icmp error. ++ */ ++ return skb_gso_network_seglen(skb) > mtu; ++} ++ ++/* called if GSO skb needs to be fragmented on forward */ ++static int ip_forward_finish_gso(struct sk_buff *skb) ++{ ++ struct dst_entry *dst = skb_dst(skb); ++ netdev_features_t features; ++ struct sk_buff *segs; ++ int ret = 0; ++ ++ features = netif_skb_dev_features(skb, dst->dev); ++ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); ++ if (IS_ERR(segs)) { ++ kfree_skb(skb); ++ return -ENOMEM; ++ } ++ ++ consume_skb(skb); ++ ++ do { ++ struct sk_buff *nskb = segs->next; ++ int err; ++ ++ segs->next = NULL; ++ err = dst_output(segs); ++ ++ if (err && ret == 0) ++ ret = err; ++ segs = nskb; ++ } while (segs); ++ ++ return ret; ++} ++ + static int ip_forward_finish(struct sk_buff *skb) + { + struct ip_options *opt = &(IPCB(skb)->opt); +@@ -49,6 +114,9 @@ static int ip_forward_finish(struct sk_b + if (unlikely(opt->optlen)) + ip_forward_options(skb); + ++ if (ip_gso_exceeds_dst_mtu(skb)) ++ return ip_forward_finish_gso(skb); ++ + return dst_output(skb); + } + +@@ -88,8 +156,7 @@ int ip_forward(struct sk_buff *skb) + if (opt->is_strictroute && rt->rt_uses_gateway) + goto sr_failed; + +- if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && +- (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { ++ if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, dst_mtu(&rt->dst))) { + IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(dst_mtu(&rt->dst))); +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -321,6 +321,20 @@ static inline int ip6_forward_finish(str + return dst_output(skb); + } + ++static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) ++{ ++ if (skb->len <= mtu || skb->local_df) ++ return false; ++ ++ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) ++ return true; ++ ++ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) ++ return false; ++ ++ return true; ++} ++ + int ip6_forward(struct sk_buff *skb) + { + struct dst_entry *dst = skb_dst(skb); +@@ -443,8 +457,7 @@ int ip6_forward(struct sk_buff *skb) + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + +- if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || +- (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { ++ if (ip6_pkt_too_big(skb, mtu)) { + /* Again, force OUTPUT device used as source address */ + skb->dev = dst->dev; + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); diff --git a/queue-3.13/net-mvneta-add-missing-bit-descriptions-for-interrupt-masks-and-causes.patch b/queue-3.13/net-mvneta-add-missing-bit-descriptions-for-interrupt-masks-and-causes.patch new file mode 100644 index 00000000000..780ac2ab625 --- /dev/null +++ b/queue-3.13/net-mvneta-add-missing-bit-descriptions-for-interrupt-masks-and-causes.patch @@ -0,0 +1,90 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: willy tarreau +Date: Thu, 16 Jan 2014 08:20:10 +0100 +Subject: net: mvneta: add missing bit descriptions for interrupt masks and causes + +From: willy tarreau + +[ Upstream commit 40ba35e74fa56866918d2f3bc0528b5b92725d5e ] + +Marvell has not published the chip's datasheet yet, so it's very hard +to find the relevant bits to manipulate to change the IRQ behaviour. +Fortunately, these bits are described in the proprietary LSP patch set +which is publicly available here : + + http://www.plugcomputer.org/downloads/mirabox/ + +So let's put them back in the driver in order to reduce the burden of +current and future maintenance. + +Cc: Thomas Petazzoni +Cc: Gregory CLEMENT +Tested-by: Arnaud Ebalard +Signed-off-by: Willy Tarreau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvneta.c | 44 ++++++++++++++++++++++++++++++++-- + 1 file changed, 42 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -101,16 +101,56 @@ + #define MVNETA_CPU_RXQ_ACCESS_ALL_MASK 0x000000ff + #define MVNETA_CPU_TXQ_ACCESS_ALL_MASK 0x0000ff00 + #define MVNETA_RXQ_TIME_COAL_REG(q) (0x2580 + ((q) << 2)) ++ ++/* Exception Interrupt Port/Queue Cause register */ ++ + #define MVNETA_INTR_NEW_CAUSE 0x25a0 +-#define MVNETA_RX_INTR_MASK(nr_rxqs) (((1 << nr_rxqs) - 1) << 8) + #define MVNETA_INTR_NEW_MASK 0x25a4 ++ ++/* bits 0..7 = TXQ SENT, one bit per queue. ++ * bits 8..15 = RXQ OCCUP, one bit per queue. ++ * bits 16..23 = RXQ FREE, one bit per queue. ++ * bit 29 = OLD_REG_SUM, see old reg ? ++ * bit 30 = TX_ERR_SUM, one bit for 4 ports ++ * bit 31 = MISC_SUM, one bit for 4 ports ++ */ ++#define MVNETA_TX_INTR_MASK(nr_txqs) (((1 << nr_txqs) - 1) << 0) ++#define MVNETA_TX_INTR_MASK_ALL (0xff << 0) ++#define MVNETA_RX_INTR_MASK(nr_rxqs) (((1 << nr_rxqs) - 1) << 8) ++#define MVNETA_RX_INTR_MASK_ALL (0xff << 8) ++ + #define MVNETA_INTR_OLD_CAUSE 0x25a8 + #define MVNETA_INTR_OLD_MASK 0x25ac ++ ++/* Data Path Port/Queue Cause Register */ + #define MVNETA_INTR_MISC_CAUSE 0x25b0 + #define MVNETA_INTR_MISC_MASK 0x25b4 ++ ++#define MVNETA_CAUSE_PHY_STATUS_CHANGE BIT(0) ++#define MVNETA_CAUSE_LINK_CHANGE BIT(1) ++#define MVNETA_CAUSE_PTP BIT(4) ++ ++#define MVNETA_CAUSE_INTERNAL_ADDR_ERR BIT(7) ++#define MVNETA_CAUSE_RX_OVERRUN BIT(8) ++#define MVNETA_CAUSE_RX_CRC_ERROR BIT(9) ++#define MVNETA_CAUSE_RX_LARGE_PKT BIT(10) ++#define MVNETA_CAUSE_TX_UNDERUN BIT(11) ++#define MVNETA_CAUSE_PRBS_ERR BIT(12) ++#define MVNETA_CAUSE_PSC_SYNC_CHANGE BIT(13) ++#define MVNETA_CAUSE_SERDES_SYNC_ERR BIT(14) ++ ++#define MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT 16 ++#define MVNETA_CAUSE_BMU_ALLOC_ERR_ALL_MASK (0xF << MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT) ++#define MVNETA_CAUSE_BMU_ALLOC_ERR_MASK(pool) (1 << (MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT + (pool))) ++ ++#define MVNETA_CAUSE_TXQ_ERROR_SHIFT 24 ++#define MVNETA_CAUSE_TXQ_ERROR_ALL_MASK (0xFF << MVNETA_CAUSE_TXQ_ERROR_SHIFT) ++#define MVNETA_CAUSE_TXQ_ERROR_MASK(q) (1 << (MVNETA_CAUSE_TXQ_ERROR_SHIFT + (q))) ++ + #define MVNETA_INTR_ENABLE 0x25b8 + #define MVNETA_TXQ_INTR_ENABLE_ALL_MASK 0x0000ff00 +-#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0xff000000 ++#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0xff000000 // note: neta says it's 0x000000FF ++ + #define MVNETA_RXQ_CMD 0x2680 + #define MVNETA_RXQ_DISABLE_SHIFT 8 + #define MVNETA_RXQ_ENABLE_MASK 0x000000ff diff --git a/queue-3.13/net-mvneta-do-not-schedule-in-mvneta_tx_timeout.patch b/queue-3.13/net-mvneta-do-not-schedule-in-mvneta_tx_timeout.patch new file mode 100644 index 00000000000..7ecb70de6b9 --- /dev/null +++ b/queue-3.13/net-mvneta-do-not-schedule-in-mvneta_tx_timeout.patch @@ -0,0 +1,80 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: willy tarreau +Date: Thu, 16 Jan 2014 08:20:09 +0100 +Subject: net: mvneta: do not schedule in mvneta_tx_timeout + +From: willy tarreau + +[ Upstream commit 290213667ab53a95456397763205e4b1e30f46b5 ] + +If a queue timeout is reported, we can oops because of some +schedules while the caller is atomic, as shown below : + + mvneta d0070000.ethernet eth0: tx timeout + BUG: scheduling while atomic: bash/1528/0x00000100 + Modules linked in: slhttp_ethdiv(C) [last unloaded: slhttp_ethdiv] + CPU: 2 PID: 1528 Comm: bash Tainted: G WC 3.13.0-rc4-mvebu-nf #180 + [] (unwind_backtrace+0x1/0x98) from [] (show_stack+0xb/0xc) + [] (show_stack+0xb/0xc) from [] (dump_stack+0x4f/0x64) + [] (dump_stack+0x4f/0x64) from [] (__schedule_bug+0x37/0x4c) + [] (__schedule_bug+0x37/0x4c) from [] (__schedule+0x325/0x3ec) + [] (__schedule+0x325/0x3ec) from [] (schedule_timeout+0xb7/0x118) + [] (schedule_timeout+0xb7/0x118) from [] (msleep+0xf/0x14) + [] (msleep+0xf/0x14) from [] (mvneta_stop_dev+0x21/0x194) + [] (mvneta_stop_dev+0x21/0x194) from [] (mvneta_tx_timeout+0x19/0x24) + [] (mvneta_tx_timeout+0x19/0x24) from [] (dev_watchdog+0x18b/0x1c4) + [] (dev_watchdog+0x18b/0x1c4) from [] (call_timer_fn.isra.27+0x17/0x5c) + [] (call_timer_fn.isra.27+0x17/0x5c) from [] (run_timer_softirq+0x115/0x170) + [] (run_timer_softirq+0x115/0x170) from [] (__do_softirq+0xbd/0x1a8) + [] (__do_softirq+0xbd/0x1a8) from [] (irq_exit+0x61/0x98) + [] (irq_exit+0x61/0x98) from [] (handle_IRQ+0x27/0x60) + [] (handle_IRQ+0x27/0x60) from [] (armada_370_xp_handle_irq+0x33/0xc8) + [] (armada_370_xp_handle_irq+0x33/0xc8) from [] (__irq_usr+0x49/0x60) + +Ben Hutchings attempted to propose a better fix consisting in using a +scheduled work for this, but while it fixed this panic, it caused other +random freezes and panics proving that the reset sequence in the driver +is unreliable and that additional fixes should be investigated. + +When sending multiple streams over a link limited to 100 Mbps, Tx timeouts +happen from time to time, and the driver correctly recovers only when the +function is disabled. + +Cc: Thomas Petazzoni +Cc: Gregory CLEMENT +Cc: Ben Hutchings +Tested-by: Arnaud Ebalard +Signed-off-by: Willy Tarreau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvneta.c | 11 ----------- + 1 file changed, 11 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -2244,16 +2244,6 @@ static void mvneta_stop_dev(struct mvnet + mvneta_rx_reset(pp); + } + +-/* tx timeout callback - display a message and stop/start the network device */ +-static void mvneta_tx_timeout(struct net_device *dev) +-{ +- struct mvneta_port *pp = netdev_priv(dev); +- +- netdev_info(dev, "tx timeout\n"); +- mvneta_stop_dev(pp); +- mvneta_start_dev(pp); +-} +- + /* Return positive if MTU is valid */ + static int mvneta_check_mtu_valid(struct net_device *dev, int mtu) + { +@@ -2634,7 +2624,6 @@ static const struct net_device_ops mvnet + .ndo_set_rx_mode = mvneta_set_rx_mode, + .ndo_set_mac_address = mvneta_set_mac_addr, + .ndo_change_mtu = mvneta_change_mtu, +- .ndo_tx_timeout = mvneta_tx_timeout, + .ndo_get_stats64 = mvneta_get_stats64, + .ndo_do_ioctl = mvneta_ioctl, + }; diff --git a/queue-3.13/net-mvneta-increase-the-64-bit-rx-tx-stats-out-of-the-hot-path.patch b/queue-3.13/net-mvneta-increase-the-64-bit-rx-tx-stats-out-of-the-hot-path.patch new file mode 100644 index 00000000000..702f1fbda1d --- /dev/null +++ b/queue-3.13/net-mvneta-increase-the-64-bit-rx-tx-stats-out-of-the-hot-path.patch @@ -0,0 +1,63 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: willy tarreau +Date: Thu, 16 Jan 2014 08:20:07 +0100 +Subject: net: mvneta: increase the 64-bit rx/tx stats out of the hot path + +From: willy tarreau + +[ Upstream commit dc4277dd41a80fd5f29a90412ea04bc3ba54fbf1 ] + +Better count packets and bytes in the stack and on 32 bit then +accumulate them at the end for once. This saves two memory writes +and two memory barriers per packet. The incoming packet rate was +increased by 4.7% on the Openblocks AX3 thanks to this. + +Cc: Thomas Petazzoni +Cc: Gregory CLEMENT +Reviewed-by: Eric Dumazet +Tested-by: Arnaud Ebalard +Signed-off-by: Willy Tarreau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvneta.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -1391,6 +1391,8 @@ static int mvneta_rx(struct mvneta_port + { + struct net_device *dev = pp->dev; + int rx_done, rx_filled; ++ u32 rcvd_pkts = 0; ++ u32 rcvd_bytes = 0; + + /* Get number of received packets */ + rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq); +@@ -1428,10 +1430,8 @@ static int mvneta_rx(struct mvneta_port + + rx_bytes = rx_desc->data_size - + (ETH_FCS_LEN + MVNETA_MH_SIZE); +- u64_stats_update_begin(&pp->rx_stats.syncp); +- pp->rx_stats.packets++; +- pp->rx_stats.bytes += rx_bytes; +- u64_stats_update_end(&pp->rx_stats.syncp); ++ rcvd_pkts++; ++ rcvd_bytes += rx_bytes; + + /* Linux processing */ + skb_reserve(skb, MVNETA_MH_SIZE); +@@ -1452,6 +1452,13 @@ static int mvneta_rx(struct mvneta_port + } + } + ++ if (rcvd_pkts) { ++ u64_stats_update_begin(&pp->rx_stats.syncp); ++ pp->rx_stats.packets += rcvd_pkts; ++ pp->rx_stats.bytes += rcvd_bytes; ++ u64_stats_update_end(&pp->rx_stats.syncp); ++ } ++ + /* Update rxq management counters */ + mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_filled); + diff --git a/queue-3.13/net-mvneta-replace-tx-timer-with-a-real-interrupt.patch b/queue-3.13/net-mvneta-replace-tx-timer-with-a-real-interrupt.patch new file mode 100644 index 00000000000..e1788565295 --- /dev/null +++ b/queue-3.13/net-mvneta-replace-tx-timer-with-a-real-interrupt.patch @@ -0,0 +1,206 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: willy tarreau +Date: Thu, 16 Jan 2014 08:20:11 +0100 +Subject: net: mvneta: replace Tx timer with a real interrupt + +From: willy tarreau + +[ Upstream commit 71f6d1b31fb1f278a345a30a2180515adc7d80ae ] + +Right now the mvneta driver doesn't handle Tx IRQ, and relies on two +mechanisms to flush Tx descriptors : a flush at the end of mvneta_tx() +and a timer. If a burst of packets is emitted faster than the device +can send them, then the queue is stopped until next wake-up of the +timer 10ms later. This causes jerky output traffic with bursts and +pauses, making it difficult to reach line rate with very few streams. + +A test on UDP traffic shows that it's not possible to go beyond 134 +Mbps / 12 kpps of outgoing traffic with 1500-bytes IP packets. Routed +traffic tends to observe pauses as well if the traffic is bursty, +making it even burstier after the wake-up. + +It seems that this feature was inherited from the original driver but +nothing there mentions any reason for not using the interrupt instead, +which the chip supports. + +Thus, this patch enables Tx interrupts and removes the timer. It does +the two at once because it's not really possible to make the two +mechanisms coexist, so a split patch doesn't make sense. + +First tests performed on a Mirabox (Armada 370) show that less CPU +seems to be used when sending traffic. One reason might be that we now +call the mvneta_tx_done_gbe() with a mask indicating which queues have +been done instead of looping over all of them. + +The same UDP test above now happily reaches 987 Mbps / 87.7 kpps. +Single-stream TCP traffic can now more easily reach line rate. HTTP +transfers of 1 MB objects over a single connection went from 730 to +840 Mbps. It is even possible to go significantly higher (>900 Mbps) +by tweaking tcp_tso_win_divisor. + +Cc: Thomas Petazzoni +Cc: Gregory CLEMENT +Cc: Arnaud Ebalard +Cc: Eric Dumazet +Tested-by: Arnaud Ebalard +Signed-off-by: Willy Tarreau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvneta.c | 71 +++++----------------------------- + 1 file changed, 12 insertions(+), 59 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -216,9 +216,6 @@ + #define MVNETA_RX_COAL_PKTS 32 + #define MVNETA_RX_COAL_USEC 100 + +-/* Timer */ +-#define MVNETA_TX_DONE_TIMER_PERIOD 10 +- + /* Napi polling weight */ + #define MVNETA_RX_POLL_WEIGHT 64 + +@@ -274,16 +271,11 @@ struct mvneta_port { + void __iomem *base; + struct mvneta_rx_queue *rxqs; + struct mvneta_tx_queue *txqs; +- struct timer_list tx_done_timer; + struct net_device *dev; + + u32 cause_rx_tx; + struct napi_struct napi; + +- /* Flags */ +- unsigned long flags; +-#define MVNETA_F_TX_DONE_TIMER_BIT 0 +- + /* Napi weight */ + int weight; + +@@ -1149,17 +1141,6 @@ static void mvneta_tx_done_pkts_coal_set + txq->done_pkts_coal = value; + } + +-/* Trigger tx done timer in MVNETA_TX_DONE_TIMER_PERIOD msecs */ +-static void mvneta_add_tx_done_timer(struct mvneta_port *pp) +-{ +- if (test_and_set_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags) == 0) { +- pp->tx_done_timer.expires = jiffies + +- msecs_to_jiffies(MVNETA_TX_DONE_TIMER_PERIOD); +- add_timer(&pp->tx_done_timer); +- } +-} +- +- + /* Handle rx descriptor fill by setting buf_cookie and buf_phys_addr */ + static void mvneta_rx_desc_fill(struct mvneta_rx_desc *rx_desc, + u32 phys_addr, u32 cookie) +@@ -1651,15 +1632,6 @@ out: + dev_kfree_skb_any(skb); + } + +- if (txq->count >= MVNETA_TXDONE_COAL_PKTS) +- mvneta_txq_done(pp, txq); +- +- /* If after calling mvneta_txq_done, count equals +- * frags, we need to set the timer +- */ +- if (txq->count == frags && frags > 0) +- mvneta_add_tx_done_timer(pp); +- + return NETDEV_TX_OK; + } + +@@ -1935,14 +1907,22 @@ static int mvneta_poll(struct napi_struc + + /* Read cause register */ + cause_rx_tx = mvreg_read(pp, MVNETA_INTR_NEW_CAUSE) & +- MVNETA_RX_INTR_MASK(rxq_number); ++ (MVNETA_RX_INTR_MASK(rxq_number) | MVNETA_TX_INTR_MASK(txq_number)); ++ ++ /* Release Tx descriptors */ ++ if (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL) { ++ int tx_todo = 0; ++ ++ mvneta_tx_done_gbe(pp, (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL), &tx_todo); ++ cause_rx_tx &= ~MVNETA_TX_INTR_MASK_ALL; ++ } + + /* For the case where the last mvneta_poll did not process all + * RX packets + */ + cause_rx_tx |= pp->cause_rx_tx; + if (rxq_number > 1) { +- while ((cause_rx_tx != 0) && (budget > 0)) { ++ while ((cause_rx_tx & MVNETA_RX_INTR_MASK_ALL) && (budget > 0)) { + int count; + struct mvneta_rx_queue *rxq; + /* get rx queue number from cause_rx_tx */ +@@ -1974,7 +1954,7 @@ static int mvneta_poll(struct napi_struc + napi_complete(napi); + local_irq_save(flags); + mvreg_write(pp, MVNETA_INTR_NEW_MASK, +- MVNETA_RX_INTR_MASK(rxq_number)); ++ MVNETA_RX_INTR_MASK(rxq_number) | MVNETA_TX_INTR_MASK(txq_number)); + local_irq_restore(flags); + } + +@@ -1982,26 +1962,6 @@ static int mvneta_poll(struct napi_struc + return rx_done; + } + +-/* tx done timer callback */ +-static void mvneta_tx_done_timer_callback(unsigned long data) +-{ +- struct net_device *dev = (struct net_device *)data; +- struct mvneta_port *pp = netdev_priv(dev); +- int tx_done = 0, tx_todo = 0; +- +- if (!netif_running(dev)) +- return ; +- +- clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags); +- +- tx_done = mvneta_tx_done_gbe(pp, +- (((1 << txq_number) - 1) & +- MVNETA_CAUSE_TXQ_SENT_DESC_ALL_MASK), +- &tx_todo); +- if (tx_todo > 0) +- mvneta_add_tx_done_timer(pp); +-} +- + /* Handle rxq fill: allocates rxq skbs; called when initializing a port */ + static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, + int num) +@@ -2251,7 +2211,7 @@ static void mvneta_start_dev(struct mvne + + /* Unmask interrupts */ + mvreg_write(pp, MVNETA_INTR_NEW_MASK, +- MVNETA_RX_INTR_MASK(rxq_number)); ++ MVNETA_RX_INTR_MASK(rxq_number) | MVNETA_TX_INTR_MASK(txq_number)); + + phy_start(pp->phy_dev); + netif_tx_start_all_queues(pp->dev); +@@ -2527,8 +2487,6 @@ static int mvneta_stop(struct net_device + free_irq(dev->irq, pp); + mvneta_cleanup_rxqs(pp); + mvneta_cleanup_txqs(pp); +- del_timer(&pp->tx_done_timer); +- clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags); + + return 0; + } +@@ -2887,11 +2845,6 @@ static int mvneta_probe(struct platform_ + } + } + +- pp->tx_done_timer.data = (unsigned long)dev; +- pp->tx_done_timer.function = mvneta_tx_done_timer_callback; +- init_timer(&pp->tx_done_timer); +- clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags); +- + pp->tx_ring_size = MVNETA_MAX_TXD; + pp->rx_ring_size = MVNETA_MAX_RXD; + diff --git a/queue-3.13/net-mvneta-use-per_cpu-stats-to-fix-an-smp-lock-up.patch b/queue-3.13/net-mvneta-use-per_cpu-stats-to-fix-an-smp-lock-up.patch new file mode 100644 index 00000000000..4f3f60f8de2 --- /dev/null +++ b/queue-3.13/net-mvneta-use-per_cpu-stats-to-fix-an-smp-lock-up.patch @@ -0,0 +1,217 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: willy tarreau +Date: Thu, 16 Jan 2014 08:20:08 +0100 +Subject: net: mvneta: use per_cpu stats to fix an SMP lock up + +From: willy tarreau + +[ Upstream commit 74c41b048db1073a04827d7f39e95ac1935524cc ] + +Stats writers are mvneta_rx() and mvneta_tx(). They don't lock anything +when they update the stats, and as a result, it randomly happens that +the stats freeze on SMP if two updates happen during stats retrieval. +This is very easily reproducible by starting two HTTP servers and binding +each of them to a different CPU, then consulting /proc/net/dev in loops +during transfers, the interface should immediately lock up. This issue +also randomly happens upon link state changes during transfers, because +the stats are collected in this situation, but it takes more attempts to +reproduce it. + +The comments in netdevice.h suggest using per_cpu stats instead to get +rid of this issue. + +This patch implements this. It merges both rx_stats and tx_stats into +a single "stats" member with a single syncp. Both mvneta_rx() and +mvneta_rx() now only update the a single CPU's counters. + +In turn, mvneta_get_stats64() does the summing by iterating over all CPUs +to get their respective stats. + +With this change, stats are still correct and no more lockup is encountered. + +Note that this bug was present since the first import of the mvneta +driver. It might make sense to backport it to some stable trees. If +so, it depends on "d33dc73 net: mvneta: increase the 64-bit rx/tx stats +out of the hot path". + +Cc: Thomas Petazzoni +Cc: Gregory CLEMENT +Reviewed-by: Eric Dumazet +Tested-by: Arnaud Ebalard +Signed-off-by: Willy Tarreau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvneta.c | 88 ++++++++++++++++++++++------------ + 1 file changed, 57 insertions(+), 31 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -221,10 +221,12 @@ + + #define MVNETA_RX_BUF_SIZE(pkt_size) ((pkt_size) + NET_SKB_PAD) + +-struct mvneta_stats { ++struct mvneta_pcpu_stats { + struct u64_stats_sync syncp; +- u64 packets; +- u64 bytes; ++ u64 rx_packets; ++ u64 rx_bytes; ++ u64 tx_packets; ++ u64 tx_bytes; + }; + + struct mvneta_port { +@@ -250,8 +252,7 @@ struct mvneta_port { + u8 mcast_count[256]; + u16 tx_ring_size; + u16 rx_ring_size; +- struct mvneta_stats tx_stats; +- struct mvneta_stats rx_stats; ++ struct mvneta_pcpu_stats *stats; + + struct mii_bus *mii_bus; + struct phy_device *phy_dev; +@@ -461,21 +462,29 @@ struct rtnl_link_stats64 *mvneta_get_sta + { + struct mvneta_port *pp = netdev_priv(dev); + unsigned int start; ++ int cpu; + +- memset(stats, 0, sizeof(struct rtnl_link_stats64)); +- +- do { +- start = u64_stats_fetch_begin_bh(&pp->rx_stats.syncp); +- stats->rx_packets = pp->rx_stats.packets; +- stats->rx_bytes = pp->rx_stats.bytes; +- } while (u64_stats_fetch_retry_bh(&pp->rx_stats.syncp, start)); +- +- +- do { +- start = u64_stats_fetch_begin_bh(&pp->tx_stats.syncp); +- stats->tx_packets = pp->tx_stats.packets; +- stats->tx_bytes = pp->tx_stats.bytes; +- } while (u64_stats_fetch_retry_bh(&pp->tx_stats.syncp, start)); ++ for_each_possible_cpu(cpu) { ++ struct mvneta_pcpu_stats *cpu_stats; ++ u64 rx_packets; ++ u64 rx_bytes; ++ u64 tx_packets; ++ u64 tx_bytes; ++ ++ cpu_stats = per_cpu_ptr(pp->stats, cpu); ++ do { ++ start = u64_stats_fetch_begin_bh(&cpu_stats->syncp); ++ rx_packets = cpu_stats->rx_packets; ++ rx_bytes = cpu_stats->rx_bytes; ++ tx_packets = cpu_stats->tx_packets; ++ tx_bytes = cpu_stats->tx_bytes; ++ } while (u64_stats_fetch_retry_bh(&cpu_stats->syncp, start)); ++ ++ stats->rx_packets += rx_packets; ++ stats->rx_bytes += rx_bytes; ++ stats->tx_packets += tx_packets; ++ stats->tx_bytes += tx_bytes; ++ } + + stats->rx_errors = dev->stats.rx_errors; + stats->rx_dropped = dev->stats.rx_dropped; +@@ -1453,10 +1462,12 @@ static int mvneta_rx(struct mvneta_port + } + + if (rcvd_pkts) { +- u64_stats_update_begin(&pp->rx_stats.syncp); +- pp->rx_stats.packets += rcvd_pkts; +- pp->rx_stats.bytes += rcvd_bytes; +- u64_stats_update_end(&pp->rx_stats.syncp); ++ struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); ++ ++ u64_stats_update_begin(&stats->syncp); ++ stats->rx_packets += rcvd_pkts; ++ stats->rx_bytes += rcvd_bytes; ++ u64_stats_update_end(&stats->syncp); + } + + /* Update rxq management counters */ +@@ -1589,11 +1600,12 @@ static int mvneta_tx(struct sk_buff *skb + + out: + if (frags > 0) { +- u64_stats_update_begin(&pp->tx_stats.syncp); +- pp->tx_stats.packets++; +- pp->tx_stats.bytes += skb->len; +- u64_stats_update_end(&pp->tx_stats.syncp); ++ struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); + ++ u64_stats_update_begin(&stats->syncp); ++ stats->tx_packets++; ++ stats->tx_bytes += skb->len; ++ u64_stats_update_end(&stats->syncp); + } else { + dev->stats.tx_dropped++; + dev_kfree_skb_any(skb); +@@ -2758,6 +2770,7 @@ static int mvneta_probe(struct platform_ + const char *mac_from; + int phy_mode; + int err; ++ int cpu; + + /* Our multiqueue support is not complete, so for now, only + * allow the usage of the first RX queue +@@ -2799,9 +2812,6 @@ static int mvneta_probe(struct platform_ + + pp = netdev_priv(dev); + +- u64_stats_init(&pp->tx_stats.syncp); +- u64_stats_init(&pp->rx_stats.syncp); +- + pp->weight = MVNETA_RX_POLL_WEIGHT; + pp->phy_node = phy_node; + pp->phy_interface = phy_mode; +@@ -2820,6 +2830,19 @@ static int mvneta_probe(struct platform_ + goto err_clk; + } + ++ /* Alloc per-cpu stats */ ++ pp->stats = alloc_percpu(struct mvneta_pcpu_stats); ++ if (!pp->stats) { ++ err = -ENOMEM; ++ goto err_unmap; ++ } ++ ++ for_each_possible_cpu(cpu) { ++ struct mvneta_pcpu_stats *stats; ++ stats = per_cpu_ptr(pp->stats, cpu); ++ u64_stats_init(&stats->syncp); ++ } ++ + dt_mac_addr = of_get_mac_address(dn); + if (dt_mac_addr) { + mac_from = "device tree"; +@@ -2849,7 +2872,7 @@ static int mvneta_probe(struct platform_ + err = mvneta_init(pp, phy_addr); + if (err < 0) { + dev_err(&pdev->dev, "can't init eth hal\n"); +- goto err_unmap; ++ goto err_free_stats; + } + mvneta_port_power_up(pp, phy_mode); + +@@ -2879,6 +2902,8 @@ static int mvneta_probe(struct platform_ + + err_deinit: + mvneta_deinit(pp); ++err_free_stats: ++ free_percpu(pp->stats); + err_unmap: + iounmap(pp->base); + err_clk: +@@ -2899,6 +2924,7 @@ static int mvneta_remove(struct platform + unregister_netdev(dev); + mvneta_deinit(pp); + clk_disable_unprepare(pp->clk); ++ free_percpu(pp->stats); + iounmap(pp->base); + irq_dispose_mapping(dev->irq); + free_netdev(dev); diff --git a/queue-3.13/net-qmi_wwan-add-netgear-aircard-340u.patch b/queue-3.13/net-qmi_wwan-add-netgear-aircard-340u.patch new file mode 100644 index 00000000000..cbff8fea467 --- /dev/null +++ b/queue-3.13/net-qmi_wwan-add-netgear-aircard-340u.patch @@ -0,0 +1,37 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Bjørn Mork +Date: Tue, 4 Feb 2014 13:04:33 +0100 +Subject: net: qmi_wwan: add Netgear Aircard 340U +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Bjørn Mork + +[ Upstream commit fbd3a77d813f211060f86cc7a2f8416caf0e03b1 ] + +This device was mentioned in an OpenWRT forum. Seems to have a "standard" +Sierra Wireless ifnumber to function layout: + 0: qcdm + 2: nmea + 3: modem + 8: qmi + 9: storage + +Signed-off-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/qmi_wwan.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -723,6 +723,7 @@ static const struct usb_device_id produc + {QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */ + {QMI_FIXED_INTF(0x1199, 0x68a2, 19)}, /* Sierra Wireless MC7710 in QMI mode */ + {QMI_FIXED_INTF(0x1199, 0x901c, 8)}, /* Sierra Wireless EM7700 */ ++ {QMI_FIXED_INTF(0x1199, 0x9051, 8)}, /* Netgear AirCard 340U */ + {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ + {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ + {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ diff --git a/queue-3.13/net-sctp-fix-sctp_connectx-abi-for-ia32-emulation-compat-mode.patch b/queue-3.13/net-sctp-fix-sctp_connectx-abi-for-ia32-emulation-compat-mode.patch new file mode 100644 index 00000000000..aa7581b889c --- /dev/null +++ b/queue-3.13/net-sctp-fix-sctp_connectx-abi-for-ia32-emulation-compat-mode.patch @@ -0,0 +1,105 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Daniel Borkmann +Date: Mon, 17 Feb 2014 12:11:11 +0100 +Subject: net: sctp: fix sctp_connectx abi for ia32 emulation/compat mode + +From: Daniel Borkmann + +[ Upstream commit ffd5939381c609056b33b7585fb05a77b4c695f3 ] + +SCTP's sctp_connectx() abi breaks for 64bit kernels compiled with 32bit +emulation (e.g. ia32 emulation or x86_x32). Due to internal usage of +'struct sctp_getaddrs_old' which includes a struct sockaddr pointer, +sizeof(param) check will always fail in kernel as the structure in +64bit kernel space is 4bytes larger than for user binaries compiled +in 32bit mode. Thus, applications making use of sctp_connectx() won't +be able to run under such circumstances. + +Introduce a compat interface in the kernel to deal with such +situations by using a 'struct compat_sctp_getaddrs_old' structure +where user data is copied into it, and then sucessively transformed +into a 'struct sctp_getaddrs_old' structure with the help of +compat_ptr(). That fixes sctp_connectx() abi without any changes +needed in user space, and lets the SCTP test suite pass when compiled +in 32bit and run on 64bit kernels. + +Fixes: f9c67811ebc0 ("sctp: Fix regression introduced by new sctp_connectx api") +Signed-off-by: Daniel Borkmann +Acked-by: Neil Horman +Acked-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 41 ++++++++++++++++++++++++++++++++--------- + 1 file changed, 32 insertions(+), 9 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -65,6 +65,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -1369,11 +1370,19 @@ static int sctp_setsockopt_connectx(stru + /* + * New (hopefully final) interface for the API. + * We use the sctp_getaddrs_old structure so that use-space library +- * can avoid any unnecessary allocations. The only defferent part ++ * can avoid any unnecessary allocations. The only different part + * is that we store the actual length of the address buffer into the +- * addrs_num structure member. That way we can re-use the existing ++ * addrs_num structure member. That way we can re-use the existing + * code. + */ ++#ifdef CONFIG_COMPAT ++struct compat_sctp_getaddrs_old { ++ sctp_assoc_t assoc_id; ++ s32 addr_num; ++ compat_uptr_t addrs; /* struct sockaddr * */ ++}; ++#endif ++ + static int sctp_getsockopt_connectx3(struct sock* sk, int len, + char __user *optval, + int __user *optlen) +@@ -1382,16 +1391,30 @@ static int sctp_getsockopt_connectx3(str + sctp_assoc_t assoc_id = 0; + int err = 0; + +- if (len < sizeof(param)) +- return -EINVAL; ++#ifdef CONFIG_COMPAT ++ if (is_compat_task()) { ++ struct compat_sctp_getaddrs_old param32; + +- if (copy_from_user(¶m, optval, sizeof(param))) +- return -EFAULT; ++ if (len < sizeof(param32)) ++ return -EINVAL; ++ if (copy_from_user(¶m32, optval, sizeof(param32))) ++ return -EFAULT; + +- err = __sctp_setsockopt_connectx(sk, +- (struct sockaddr __user *)param.addrs, +- param.addr_num, &assoc_id); ++ param.assoc_id = param32.assoc_id; ++ param.addr_num = param32.addr_num; ++ param.addrs = compat_ptr(param32.addrs); ++ } else ++#endif ++ { ++ if (len < sizeof(param)) ++ return -EINVAL; ++ if (copy_from_user(¶m, optval, sizeof(param))) ++ return -EFAULT; ++ } + ++ err = __sctp_setsockopt_connectx(sk, (struct sockaddr __user *) ++ param.addrs, param.addr_num, ++ &assoc_id); + if (err == 0 || err == -EINPROGRESS) { + if (copy_to_user(optval, &assoc_id, sizeof(assoc_id))) + return -EFAULT; diff --git a/queue-3.13/net-use-__gfp_noretry-for-high-order-allocations.patch b/queue-3.13/net-use-__gfp_noretry-for-high-order-allocations.patch new file mode 100644 index 00000000000..e24f730b282 --- /dev/null +++ b/queue-3.13/net-use-__gfp_noretry-for-high-order-allocations.patch @@ -0,0 +1,75 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Eric Dumazet +Date: Thu, 6 Feb 2014 10:42:42 -0800 +Subject: net: use __GFP_NORETRY for high order allocations + +From: Eric Dumazet + +[ Upstream commit ed98df3361f059db42786c830ea96e2d18b8d4db ] + +sock_alloc_send_pskb() & sk_page_frag_refill() +have a loop trying high order allocations to prepare +skb with low number of fragments as this increases performance. + +Problem is that under memory pressure/fragmentation, this can +trigger OOM while the intent was only to try the high order +allocations, then fallback to order-0 allocations. + +We had various reports from unexpected regressions. + +According to David, setting __GFP_NORETRY should be fine, +as the asynchronous compaction is still enabled, and this +will prevent OOM from kicking as in : + +CFSClientEventm invoked oom-killer: gfp_mask=0x42d0, order=3, oom_adj=0, +oom_score_adj=0, oom_score_badness=2 (enabled),memcg_scoring=disabled +CFSClientEventm + +Call Trace: + [] dump_header+0xe1/0x23e + [] oom_kill_process+0x6a/0x323 + [] out_of_memory+0x4b3/0x50d + [] __alloc_pages_may_oom+0xa2/0xc7 + [] __alloc_pages_nodemask+0x1002/0x17f0 + [] alloc_pages_current+0x103/0x2b0 + [] sk_page_frag_refill+0x8f/0x160 + [] tcp_sendmsg+0x560/0xee0 + [] inet_sendmsg+0x67/0x100 + [] __sock_sendmsg_nosec+0x6c/0x90 + [] sock_sendmsg+0xc5/0xf0 + [] __sys_sendmsg+0x136/0x430 + [] sys_sendmsg+0x88/0x110 + [] system_call_fastpath+0x16/0x1b +Out of Memory: Kill process 2856 (bash) score 9999 or sacrifice child + +Signed-off-by: Eric Dumazet +Acked-by: David Rientjes +Acked-by: "Eric W. Biederman" +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1800,7 +1800,9 @@ struct sk_buff *sock_alloc_send_pskb(str + while (order) { + if (npages >= 1 << order) { + page = alloc_pages(sk->sk_allocation | +- __GFP_COMP | __GFP_NOWARN, ++ __GFP_COMP | ++ __GFP_NOWARN | ++ __GFP_NORETRY, + order); + if (page) + goto fill_page; +@@ -1872,7 +1874,7 @@ bool skb_page_frag_refill(unsigned int s + gfp_t gfp = prio; + + if (order) +- gfp |= __GFP_COMP | __GFP_NOWARN; ++ gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; + pfrag->page = alloc_pages(gfp, order); + if (likely(pfrag->page)) { + pfrag->offset = 0; diff --git a/queue-3.13/netpoll-fix-netconsole-ipv6-setup.patch b/queue-3.13/netpoll-fix-netconsole-ipv6-setup.patch new file mode 100644 index 00000000000..2324fc03e5c --- /dev/null +++ b/queue-3.13/netpoll-fix-netconsole-ipv6-setup.patch @@ -0,0 +1,52 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Sabrina Dubroca +Date: Thu, 6 Feb 2014 18:34:12 +0100 +Subject: netpoll: fix netconsole IPv6 setup + +From: Sabrina Dubroca + +[ Upstream commit 00fe11b3c67dc670fe6391d22f1fe64e7c99a8ec ] + +Currently, to make netconsole start over IPv6, the source address +needs to be specified. Without a source address, netpoll_parse_options +assumes we're setting up over IPv4 and the destination IPv6 address is +rejected. + +Check if the IP version has been forced by a source address before +checking for a version mismatch when parsing the destination address. + +Signed-off-by: Sabrina Dubroca +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/netpoll.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/core/netpoll.c ++++ b/net/core/netpoll.c +@@ -948,6 +948,7 @@ int netpoll_parse_options(struct netpoll + { + char *cur=opt, *delim; + int ipv6; ++ bool ipversion_set = false; + + if (*cur != '@') { + if ((delim = strchr(cur, '@')) == NULL) +@@ -960,6 +961,7 @@ int netpoll_parse_options(struct netpoll + cur++; + + if (*cur != '/') { ++ ipversion_set = true; + if ((delim = strchr(cur, '/')) == NULL) + goto parse_failed; + *delim = 0; +@@ -1002,7 +1004,7 @@ int netpoll_parse_options(struct netpoll + ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); + if (ipv6 < 0) + goto parse_failed; +- else if (np->ipv6 != (bool)ipv6) ++ else if (ipversion_set && np->ipv6 != (bool)ipv6) + goto parse_failed; + else + np->ipv6 = (bool)ipv6; diff --git a/queue-3.13/series b/queue-3.13/series index 04ddf68ecf4..7068abdbf49 100644 --- a/queue-3.13/series +++ b/queue-3.13/series @@ -33,3 +33,40 @@ rtlwifi-fix-incorrect-return-from-rtl_ps_enable_nic.patch rtlwifi-rtl8192ce-fix-too-long-disable-of-irqs.patch nfs-do-not-set-nfs_ino_invalid_label-unless-server-supports-labeled-nfs.patch nfs-fix-error-return-in-nfs4_select_rw_stateid.patch +6lowpan-fix-lockdep-splats.patch +9p-trans_virtio.c-fix-broken-zero-copy-on-vmalloc-buffers.patch +bridge-fix-netconsole-setup-over-bridge.patch +can-add-destructor-for-self-generated-skbs.patch +ipv4-fix-runtime-warning-in-rtmsg_ifa.patch +net-fix-ip-rule-iif-oif-device-rename.patch +netpoll-fix-netconsole-ipv6-setup.patch +net-qmi_wwan-add-netgear-aircard-340u.patch +tcp-tsq-fix-nonagle-handling.patch +tg3-fix-deadlock-in-tg3_change_mtu.patch +vhost-fix-ref-cnt-checking-deadlock.patch +hyperv-fix-the-carrier-status-setting.patch +net-asix-add-missing-flag-to-struct-driver_info.patch +gre-add-link-local-route-when-local-addr-is-any.patch +usbnet-remove-generic-hard_header_len-check.patch +bonding-802.3ad-make-aggregator_identifier-bond-private.patch +ipv4-fix-counter-in_slow_tot.patch +net-sctp-fix-sctp_connectx-abi-for-ia32-emulation-compat-mode.patch +net-add-and-use-skb_gso_transport_seglen.patch +net-core-introduce-netif_skb_dev_features.patch +net-ip-ipv6-handle-gso-skbs-in-forwarding-path.patch +net-mvneta-increase-the-64-bit-rx-tx-stats-out-of-the-hot-path.patch +net-mvneta-use-per_cpu-stats-to-fix-an-smp-lock-up.patch +net-mvneta-do-not-schedule-in-mvneta_tx_timeout.patch +net-mvneta-add-missing-bit-descriptions-for-interrupt-masks-and-causes.patch +net-mvneta-replace-tx-timer-with-a-real-interrupt.patch +net-use-__gfp_noretry-for-high-order-allocations.patch +batman-adv-fix-soft-interface-mtu-computation.patch +batman-adv-fix-tt-tvlv-parsing-on-ogm-reception.patch +batman-adv-release-vlan-object-after-checking-the-crc.patch +batman-adv-properly-check-pskb_may_pull-return-value.patch +batman-adv-avoid-potential-race-condition-when-adding-a-new-neighbour.patch +batman-adv-fix-potential-orig_node-reference-leak.patch +batman-adv-fix-tt-crc-computation-by-ensuring-byte-order.patch +batman-adv-free-skb-on-tvlv-parsing-success.patch +batman-adv-avoid-double-free-when-orig_node-initialization-fails.patch +batman-adv-fix-potential-kernel-paging-error-for-unicast-transmissions.patch diff --git a/queue-3.13/tcp-tsq-fix-nonagle-handling.patch b/queue-3.13/tcp-tsq-fix-nonagle-handling.patch new file mode 100644 index 00000000000..3c813e09f1f --- /dev/null +++ b/queue-3.13/tcp-tsq-fix-nonagle-handling.patch @@ -0,0 +1,68 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: John Ogness +Date: Sun, 9 Feb 2014 18:40:11 -0800 +Subject: tcp: tsq: fix nonagle handling + +From: John Ogness + +[ Upstream commit bf06200e732de613a1277984bf34d1a21c2de03d ] + +Commit 46d3ceabd8d9 ("tcp: TCP Small Queues") introduced a possible +regression for applications using TCP_NODELAY. + +If TCP session is throttled because of tsq, we should consult +tp->nonagle when TX completion is done and allow us to send additional +segment, especially if this segment is not a full MSS. +Otherwise this segment is sent after an RTO. + +[edumazet] : Cooked the changelog, added another fix about testing +sk_wmem_alloc twice because TX completion can happen right before +setting TSQ_THROTTLED bit. + +This problem is particularly visible with recent auto corking, +but might also be triggered with low tcp_limit_output_bytes +values or NIC drivers delaying TX completion by hundred of usec, +and very low rtt. + +Thomas Glanzmann for example reported an iscsi regression, caused +by tcp auto corking making this bug quite visible. + +Fixes: 46d3ceabd8d9 ("tcp: TCP Small Queues") +Signed-off-by: John Ogness +Signed-off-by: Eric Dumazet +Reported-by: Thomas Glanzmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -696,7 +696,8 @@ static void tcp_tsq_handler(struct sock + if ((1 << sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | + TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) +- tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); ++ tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle, ++ 0, GFP_ATOMIC); + } + /* + * One tasklest per cpu tries to send more skbs. +@@ -1884,7 +1885,15 @@ static bool tcp_write_xmit(struct sock * + + if (atomic_read(&sk->sk_wmem_alloc) > limit) { + set_bit(TSQ_THROTTLED, &tp->tsq_flags); +- break; ++ /* It is possible TX completion already happened ++ * before we set TSQ_THROTTLED, so we must ++ * test again the condition. ++ * We abuse smp_mb__after_clear_bit() because ++ * there is no smp_mb__after_set_bit() yet ++ */ ++ smp_mb__after_clear_bit(); ++ if (atomic_read(&sk->sk_wmem_alloc) > limit) ++ break; + } + + limit = mss_now; diff --git a/queue-3.13/tg3-fix-deadlock-in-tg3_change_mtu.patch b/queue-3.13/tg3-fix-deadlock-in-tg3_change_mtu.patch new file mode 100644 index 00000000000..91b4127f8b6 --- /dev/null +++ b/queue-3.13/tg3-fix-deadlock-in-tg3_change_mtu.patch @@ -0,0 +1,49 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Nithin Sujir +Date: Thu, 6 Feb 2014 14:13:05 -0800 +Subject: tg3: Fix deadlock in tg3_change_mtu() + +From: Nithin Sujir + +[ Upstream commit c6993dfd7db9b0c6b7ca7503a56fda9236a4710f ] + +Quoting David Vrabel - +"5780 cards cannot have jumbo frames and TSO enabled together. When +jumbo frames are enabled by setting the MTU, the TSO feature must be +cleared. This is done indirectly by calling netdev_update_features() +which will call tg3_fix_features() to actually clear the flags. + +netdev_update_features() will also trigger a new netlink message for the +feature change event which will result in a call to tg3_get_stats64() +which deadlocks on the tg3 lock." + +tg3_set_mtu() does not need to be under the tg3 lock since converting +the flags to use set_bit(). Move it out to after tg3_netif_stop(). + +Reported-by: David Vrabel +Tested-by: David Vrabel +Signed-off-by: Michael Chan +Signed-off-by: Nithin Nayak Sujir +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/tg3.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -13965,12 +13965,12 @@ static int tg3_change_mtu(struct net_dev + + tg3_netif_stop(tp); + ++ tg3_set_mtu(dev, tp, new_mtu); ++ + tg3_full_lock(tp, 1); + + tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); + +- tg3_set_mtu(dev, tp, new_mtu); +- + /* Reset PHY, otherwise the read DMA engine will be in a mode that + * breaks all requests to 256 bytes. + */ diff --git a/queue-3.13/usbnet-remove-generic-hard_header_len-check.patch b/queue-3.13/usbnet-remove-generic-hard_header_len-check.patch new file mode 100644 index 00000000000..5c92801ba6e --- /dev/null +++ b/queue-3.13/usbnet-remove-generic-hard_header_len-check.patch @@ -0,0 +1,211 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: Emil Goode +Date: Thu, 13 Feb 2014 17:50:19 +0100 +Subject: usbnet: remove generic hard_header_len check + +From: Emil Goode + +[ Upstream commit eb85569fe2d06c2fbf4de7b66c263ca095b397aa ] + +This patch removes a generic hard_header_len check from the usbnet +module that is causing dropped packages under certain circumstances +for devices that send rx packets that cross urb boundaries. + +One example is the AX88772B which occasionally send rx packets that +cross urb boundaries where the remaining partial packet is sent with +no hardware header. When the buffer with a partial packet is of less +number of octets than the value of hard_header_len the buffer is +discarded by the usbnet module. + +With AX88772B this can be reproduced by using ping with a packet +size between 1965-1976. + +The bug has been reported here: + +https://bugzilla.kernel.org/show_bug.cgi?id=29082 + +This patch introduces the following changes: +- Removes the generic hard_header_len check in the rx_complete + function in the usbnet module. +- Introduces a ETH_HLEN check for skbs that are not cloned from + within a rx_fixup callback. +- For safety a hard_header_len check is added to each rx_fixup + callback function that could be affected by this change. + These extra checks could possibly be removed by someone + who has the hardware to test. +- Removes a call to dev_kfree_skb_any() and instead utilizes the + dev->done list to queue skbs for cleanup. + +The changes place full responsibility on the rx_fixup callback +functions that clone skbs to only pass valid skbs to the +usbnet_skb_return function. + +Signed-off-by: Emil Goode +Reported-by: Igor Gnatenko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/ax88179_178a.c | 4 ++++ + drivers/net/usb/gl620a.c | 4 ++++ + drivers/net/usb/mcs7830.c | 5 +++-- + drivers/net/usb/net1080.c | 4 ++++ + drivers/net/usb/qmi_wwan.c | 8 ++++---- + drivers/net/usb/rndis_host.c | 4 ++++ + drivers/net/usb/smsc75xx.c | 4 ++++ + drivers/net/usb/smsc95xx.c | 4 ++++ + drivers/net/usb/usbnet.c | 25 ++++++++++--------------- + 9 files changed, 41 insertions(+), 21 deletions(-) + +--- a/drivers/net/usb/ax88179_178a.c ++++ b/drivers/net/usb/ax88179_178a.c +@@ -1119,6 +1119,10 @@ static int ax88179_rx_fixup(struct usbne + u16 hdr_off; + u32 *pkt_hdr; + ++ /* This check is no longer done by usbnet */ ++ if (skb->len < dev->net->hard_header_len) ++ return 0; ++ + skb_trim(skb, skb->len - 4); + memcpy(&rx_hdr, skb_tail_pointer(skb), 4); + le32_to_cpus(&rx_hdr); +--- a/drivers/net/usb/gl620a.c ++++ b/drivers/net/usb/gl620a.c +@@ -86,6 +86,10 @@ static int genelink_rx_fixup(struct usbn + u32 size; + u32 count; + ++ /* This check is no longer done by usbnet */ ++ if (skb->len < dev->net->hard_header_len) ++ return 0; ++ + header = (struct gl_header *) skb->data; + + // get the packet count of the received skb +--- a/drivers/net/usb/mcs7830.c ++++ b/drivers/net/usb/mcs7830.c +@@ -528,8 +528,9 @@ static int mcs7830_rx_fixup(struct usbne + { + u8 status; + +- if (skb->len == 0) { +- dev_err(&dev->udev->dev, "unexpected empty rx frame\n"); ++ /* This check is no longer done by usbnet */ ++ if (skb->len < dev->net->hard_header_len) { ++ dev_err(&dev->udev->dev, "unexpected tiny rx frame\n"); + return 0; + } + +--- a/drivers/net/usb/net1080.c ++++ b/drivers/net/usb/net1080.c +@@ -366,6 +366,10 @@ static int net1080_rx_fixup(struct usbne + struct nc_trailer *trailer; + u16 hdr_len, packet_len; + ++ /* This check is no longer done by usbnet */ ++ if (skb->len < dev->net->hard_header_len) ++ return 0; ++ + if (!(skb->len & 0x01)) { + netdev_dbg(dev->net, "rx framesize %d range %d..%d mtu %d\n", + skb->len, dev->net->hard_header_len, dev->hard_mtu, +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -80,10 +80,10 @@ static int qmi_wwan_rx_fixup(struct usbn + { + __be16 proto; + +- /* usbnet rx_complete guarantees that skb->len is at least +- * hard_header_len, so we can inspect the dest address without +- * checking skb->len +- */ ++ /* This check is no longer done by usbnet */ ++ if (skb->len < dev->net->hard_header_len) ++ return 0; ++ + switch (skb->data[0] & 0xf0) { + case 0x40: + proto = htons(ETH_P_IP); +--- a/drivers/net/usb/rndis_host.c ++++ b/drivers/net/usb/rndis_host.c +@@ -494,6 +494,10 @@ EXPORT_SYMBOL_GPL(rndis_unbind); + */ + int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb) + { ++ /* This check is no longer done by usbnet */ ++ if (skb->len < dev->net->hard_header_len) ++ return 0; ++ + /* peripheral may have batched packets to us... */ + while (likely(skb->len)) { + struct rndis_data_hdr *hdr = (void *)skb->data; +--- a/drivers/net/usb/smsc75xx.c ++++ b/drivers/net/usb/smsc75xx.c +@@ -2108,6 +2108,10 @@ static void smsc75xx_rx_csum_offload(str + + static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) + { ++ /* This check is no longer done by usbnet */ ++ if (skb->len < dev->net->hard_header_len) ++ return 0; ++ + while (skb->len > 0) { + u32 rx_cmd_a, rx_cmd_b, align_count, size; + struct sk_buff *ax_skb; +--- a/drivers/net/usb/smsc95xx.c ++++ b/drivers/net/usb/smsc95xx.c +@@ -1725,6 +1725,10 @@ static void smsc95xx_rx_csum_offload(str + + static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) + { ++ /* This check is no longer done by usbnet */ ++ if (skb->len < dev->net->hard_header_len) ++ return 0; ++ + while (skb->len > 0) { + u32 header, align_count; + struct sk_buff *ax_skb; +--- a/drivers/net/usb/usbnet.c ++++ b/drivers/net/usb/usbnet.c +@@ -543,17 +543,19 @@ static inline void rx_process (struct us + } + // else network stack removes extra byte if we forced a short packet + +- if (skb->len) { +- /* all data was already cloned from skb inside the driver */ +- if (dev->driver_info->flags & FLAG_MULTI_PACKET) +- dev_kfree_skb_any(skb); +- else +- usbnet_skb_return(dev, skb); ++ /* all data was already cloned from skb inside the driver */ ++ if (dev->driver_info->flags & FLAG_MULTI_PACKET) ++ goto done; ++ ++ if (skb->len < ETH_HLEN) { ++ dev->net->stats.rx_errors++; ++ dev->net->stats.rx_length_errors++; ++ netif_dbg(dev, rx_err, dev->net, "rx length %d\n", skb->len); ++ } else { ++ usbnet_skb_return(dev, skb); + return; + } + +- netif_dbg(dev, rx_err, dev->net, "drop\n"); +- dev->net->stats.rx_errors++; + done: + skb_queue_tail(&dev->done, skb); + } +@@ -575,13 +577,6 @@ static void rx_complete (struct urb *urb + switch (urb_status) { + /* success */ + case 0: +- if (skb->len < dev->net->hard_header_len) { +- state = rx_cleanup; +- dev->net->stats.rx_errors++; +- dev->net->stats.rx_length_errors++; +- netif_dbg(dev, rx_err, dev->net, +- "rx length %d\n", skb->len); +- } + break; + + /* stalls need manual reset. this is rare ... except that diff --git a/queue-3.13/vhost-fix-ref-cnt-checking-deadlock.patch b/queue-3.13/vhost-fix-ref-cnt-checking-deadlock.patch new file mode 100644 index 00000000000..b8fe6398643 --- /dev/null +++ b/queue-3.13/vhost-fix-ref-cnt-checking-deadlock.patch @@ -0,0 +1,141 @@ +From foo@baz Thu Feb 27 20:11:26 PST 2014 +From: "Michael S. Tsirkin" +Date: Thu, 13 Feb 2014 11:42:05 +0200 +Subject: vhost: fix ref cnt checking deadlock + +From: "Michael S. Tsirkin" + +[ Upstream commit 0ad8b480d6ee916aa84324f69acf690142aecd0e ] + +vhost checked the counter within the refcnt before decrementing. It +really wanted to know that it is the one that has the last reference, as +a way to batch freeing resources a bit more efficiently. + +Note: we only let refcount go to 0 on device release. + +This works well but we now access the ref counter twice so there's a +race: all users might see a high count and decide to defer freeing +resources. +In the end no one initiates freeing resources until the last reference +is gone (which is on VM shotdown so might happen after a looooong time). + +Let's do what we probably should have done straight away: +switch from kref to plain atomic, documenting the +semantics, return the refcount value atomically after decrement, +then use that to avoid the deadlock. + +Reported-by: Qin Chuanyu +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/net.c | 41 ++++++++++++++++++++--------------------- + 1 file changed, 20 insertions(+), 21 deletions(-) + +--- a/drivers/vhost/net.c ++++ b/drivers/vhost/net.c +@@ -70,7 +70,12 @@ enum { + }; + + struct vhost_net_ubuf_ref { +- struct kref kref; ++ /* refcount follows semantics similar to kref: ++ * 0: object is released ++ * 1: no outstanding ubufs ++ * >1: outstanding ubufs ++ */ ++ atomic_t refcount; + wait_queue_head_t wait; + struct vhost_virtqueue *vq; + }; +@@ -116,14 +121,6 @@ static void vhost_net_enable_zcopy(int v + vhost_net_zcopy_mask |= 0x1 << vq; + } + +-static void vhost_net_zerocopy_done_signal(struct kref *kref) +-{ +- struct vhost_net_ubuf_ref *ubufs; +- +- ubufs = container_of(kref, struct vhost_net_ubuf_ref, kref); +- wake_up(&ubufs->wait); +-} +- + static struct vhost_net_ubuf_ref * + vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy) + { +@@ -134,21 +131,24 @@ vhost_net_ubuf_alloc(struct vhost_virtqu + ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL); + if (!ubufs) + return ERR_PTR(-ENOMEM); +- kref_init(&ubufs->kref); ++ atomic_set(&ubufs->refcount, 1); + init_waitqueue_head(&ubufs->wait); + ubufs->vq = vq; + return ubufs; + } + +-static void vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs) ++static int vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs) + { +- kref_put(&ubufs->kref, vhost_net_zerocopy_done_signal); ++ int r = atomic_sub_return(1, &ubufs->refcount); ++ if (unlikely(!r)) ++ wake_up(&ubufs->wait); ++ return r; + } + + static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) + { +- kref_put(&ubufs->kref, vhost_net_zerocopy_done_signal); +- wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); ++ vhost_net_ubuf_put(ubufs); ++ wait_event(ubufs->wait, !atomic_read(&ubufs->refcount)); + } + + static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) +@@ -306,22 +306,21 @@ static void vhost_zerocopy_callback(stru + { + struct vhost_net_ubuf_ref *ubufs = ubuf->ctx; + struct vhost_virtqueue *vq = ubufs->vq; +- int cnt = atomic_read(&ubufs->kref.refcount); ++ int cnt; + + /* set len to mark this desc buffers done DMA */ + vq->heads[ubuf->desc].len = success ? + VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; +- vhost_net_ubuf_put(ubufs); ++ cnt = vhost_net_ubuf_put(ubufs); + + /* + * Trigger polling thread if guest stopped submitting new buffers: +- * in this case, the refcount after decrement will eventually reach 1 +- * so here it is 2. ++ * in this case, the refcount after decrement will eventually reach 1. + * We also trigger polling periodically after each 16 packets + * (the value 16 here is more or less arbitrary, it's tuned to trigger + * less than 10% of times). + */ +- if (cnt <= 2 || !(cnt % 16)) ++ if (cnt <= 1 || !(cnt % 16)) + vhost_poll_queue(&vq->poll); + } + +@@ -420,7 +419,7 @@ static void handle_tx(struct vhost_net * + msg.msg_control = ubuf; + msg.msg_controllen = sizeof(ubuf); + ubufs = nvq->ubufs; +- kref_get(&ubufs->kref); ++ atomic_inc(&ubufs->refcount); + nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; + } else { + msg.msg_control = NULL; +@@ -785,7 +784,7 @@ static void vhost_net_flush(struct vhost + vhost_net_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs); + mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); + n->tx_flush = false; +- kref_init(&n->vqs[VHOST_NET_VQ_TX].ubufs->kref); ++ atomic_set(&n->vqs[VHOST_NET_VQ_TX].ubufs->refcount, 1); + mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); + } + }