]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.18-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 18 Jan 2015 02:19:31 +0000 (18:19 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 18 Jan 2015 02:19:31 +0000 (18:19 -0800)
added patches:
alx-fix-alx_poll.patch
batman-adv-avoid-null-dereferences-and-fix-if-check.patch
batman-adv-calculate-extra-tail-size-based-on-queued-fragments.patch
batman-adv-unify-fragment-size-calculation.patch
enic-fix-rx-skb-checksum.patch
geneve-fix-races-between-socket-add-and-release.patch
geneve-remove-socket-and-offload-handlers-at-destruction.patch
gre-fix-the-inner-mac-header-in-nbma-tunnel-xmit-path.patch
in6-fix-conflict-with-glibc.patch
net-core-handle-csum-for-checksum_complete-vxlan-forwarding.patch
net-drop-the-packet-when-fails-to-do-software-segmentation-or-header-check.patch
net-fix-stacked-vlan-offload-features-computation.patch
net-generalize-ndo_gso_check-to-ndo_features_check.patch
net-mlx4-cache-line-cqe-eqe-stride-fixes.patch
net-mlx4_core-correcly-update-the-mtt-s-offset-in-the-mr-re-reg-flow.patch
net-mlx4_en-doorbell-is-byteswapped-in-little-endian-archs.patch
net-reset-secmark-when-scrubbing-packet.patch
netlink-always-copy-on-mmap-tx.patch
netlink-don-t-reorder-loads-stores-before-marking-mmap-netlink-frame-as-available.patch
tcp-do-not-apply-tso-segment-limit-to-non-tso-packets.patch
tcp6-don-t-move-ip6cb-before-xfrm6_policy_check.patch
team-avoid-possible-underflow-of-count_pending-value-for-notify_peers-and-mcast_rejoin.patch
tg3-tg3_disable_ints-using-uninitialized-mailbox-value-to-disable-interrupts.patch
xen-netback-fixing-the-propagation-of-the-transmit-shaper-timeout.patch
xen-netback-support-frontends-without-feature-rx-notify-again.patch

25 files changed:
queue-3.18/alx-fix-alx_poll.patch [new file with mode: 0644]
queue-3.18/batman-adv-avoid-null-dereferences-and-fix-if-check.patch [new file with mode: 0644]
queue-3.18/batman-adv-calculate-extra-tail-size-based-on-queued-fragments.patch [new file with mode: 0644]
queue-3.18/batman-adv-unify-fragment-size-calculation.patch [new file with mode: 0644]
queue-3.18/enic-fix-rx-skb-checksum.patch [new file with mode: 0644]
queue-3.18/geneve-fix-races-between-socket-add-and-release.patch [new file with mode: 0644]
queue-3.18/geneve-remove-socket-and-offload-handlers-at-destruction.patch [new file with mode: 0644]
queue-3.18/gre-fix-the-inner-mac-header-in-nbma-tunnel-xmit-path.patch [new file with mode: 0644]
queue-3.18/in6-fix-conflict-with-glibc.patch [new file with mode: 0644]
queue-3.18/net-core-handle-csum-for-checksum_complete-vxlan-forwarding.patch [new file with mode: 0644]
queue-3.18/net-drop-the-packet-when-fails-to-do-software-segmentation-or-header-check.patch [new file with mode: 0644]
queue-3.18/net-fix-stacked-vlan-offload-features-computation.patch [new file with mode: 0644]
queue-3.18/net-generalize-ndo_gso_check-to-ndo_features_check.patch [new file with mode: 0644]
queue-3.18/net-mlx4-cache-line-cqe-eqe-stride-fixes.patch [new file with mode: 0644]
queue-3.18/net-mlx4_core-correcly-update-the-mtt-s-offset-in-the-mr-re-reg-flow.patch [new file with mode: 0644]
queue-3.18/net-mlx4_en-doorbell-is-byteswapped-in-little-endian-archs.patch [new file with mode: 0644]
queue-3.18/net-reset-secmark-when-scrubbing-packet.patch [new file with mode: 0644]
queue-3.18/netlink-always-copy-on-mmap-tx.patch [new file with mode: 0644]
queue-3.18/netlink-don-t-reorder-loads-stores-before-marking-mmap-netlink-frame-as-available.patch [new file with mode: 0644]
queue-3.18/tcp-do-not-apply-tso-segment-limit-to-non-tso-packets.patch [new file with mode: 0644]
queue-3.18/tcp6-don-t-move-ip6cb-before-xfrm6_policy_check.patch [new file with mode: 0644]
queue-3.18/team-avoid-possible-underflow-of-count_pending-value-for-notify_peers-and-mcast_rejoin.patch [new file with mode: 0644]
queue-3.18/tg3-tg3_disable_ints-using-uninitialized-mailbox-value-to-disable-interrupts.patch [new file with mode: 0644]
queue-3.18/xen-netback-fixing-the-propagation-of-the-transmit-shaper-timeout.patch [new file with mode: 0644]
queue-3.18/xen-netback-support-frontends-without-feature-rx-notify-again.patch [new file with mode: 0644]

diff --git a/queue-3.18/alx-fix-alx_poll.patch b/queue-3.18/alx-fix-alx_poll.patch
new file mode 100644 (file)
index 0000000..9524923
--- /dev/null
@@ -0,0 +1,110 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 11 Jan 2015 10:32:18 -0800
+Subject: alx: fix alx_poll()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7a05dc64e2e4c611d89007b125b20c0d2a4d31a5 ]
+
+Commit d75b1ade567f ("net: less interrupt masking in NAPI") uncovered
+wrong alx_poll() behavior.
+
+A NAPI poll() handler is supposed to return exactly the budget when/if
+napi_complete() has not been called.
+
+It is also supposed to return number of frames that were received, so
+that netdev_budget can have a meaning.
+
+Also, in case of TX pressure, we still have to dequeue received
+packets : alx_clean_rx_irq() has to be called even if
+alx_clean_tx_irq(alx) returns false, otherwise device is half duplex.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Fixes: d75b1ade567f ("net: less interrupt masking in NAPI")
+Reported-by: Oded Gabbay <oded.gabbay@amd.com>
+Bisected-by: Oded Gabbay <oded.gabbay@amd.com>
+Tested-by: Oded Gabbay <oded.gabbay@amd.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/atheros/alx/main.c |   24 +++++++++++++-----------
+ 1 file changed, 13 insertions(+), 11 deletions(-)
+
+--- a/drivers/net/ethernet/atheros/alx/main.c
++++ b/drivers/net/ethernet/atheros/alx/main.c
+@@ -184,15 +184,16 @@ static void alx_schedule_reset(struct al
+       schedule_work(&alx->reset_wk);
+ }
+-static bool alx_clean_rx_irq(struct alx_priv *alx, int budget)
++static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
+ {
+       struct alx_rx_queue *rxq = &alx->rxq;
+       struct alx_rrd *rrd;
+       struct alx_buffer *rxb;
+       struct sk_buff *skb;
+       u16 length, rfd_cleaned = 0;
++      int work = 0;
+-      while (budget > 0) {
++      while (work < budget) {
+               rrd = &rxq->rrd[rxq->rrd_read_idx];
+               if (!(rrd->word3 & cpu_to_le32(1 << RRD_UPDATED_SHIFT)))
+                       break;
+@@ -203,7 +204,7 @@ static bool alx_clean_rx_irq(struct alx_
+                   ALX_GET_FIELD(le32_to_cpu(rrd->word0),
+                                 RRD_NOR) != 1) {
+                       alx_schedule_reset(alx);
+-                      return 0;
++                      return work;
+               }
+               rxb = &rxq->bufs[rxq->read_idx];
+@@ -243,7 +244,7 @@ static bool alx_clean_rx_irq(struct alx_
+               }
+               napi_gro_receive(&alx->napi, skb);
+-              budget--;
++              work++;
+ next_pkt:
+               if (++rxq->read_idx == alx->rx_ringsz)
+@@ -258,21 +259,22 @@ next_pkt:
+       if (rfd_cleaned)
+               alx_refill_rx_ring(alx, GFP_ATOMIC);
+-      return budget > 0;
++      return work;
+ }
+ static int alx_poll(struct napi_struct *napi, int budget)
+ {
+       struct alx_priv *alx = container_of(napi, struct alx_priv, napi);
+       struct alx_hw *hw = &alx->hw;
+-      bool complete = true;
+       unsigned long flags;
++      bool tx_complete;
++      int work;
+-      complete = alx_clean_tx_irq(alx) &&
+-                 alx_clean_rx_irq(alx, budget);
++      tx_complete = alx_clean_tx_irq(alx);
++      work = alx_clean_rx_irq(alx, budget);
+-      if (!complete)
+-              return 1;
++      if (!tx_complete || work == budget)
++              return budget;
+       napi_complete(&alx->napi);
+@@ -284,7 +286,7 @@ static int alx_poll(struct napi_struct *
+       alx_post_write(hw);
+-      return 0;
++      return work;
+ }
+ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
diff --git a/queue-3.18/batman-adv-avoid-null-dereferences-and-fix-if-check.patch b/queue-3.18/batman-adv-avoid-null-dereferences-and-fix-if-check.patch
new file mode 100644 (file)
index 0000000..ce55508
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Antonio Quartulli <antonio@meshcoding.com>
+Date: Sat, 20 Dec 2014 13:48:57 +0100
+Subject: batman-adv: avoid NULL dereferences and fix if check
+
+From: Antonio Quartulli <antonio@meshcoding.com>
+
+[ Upstream commit 0d1644919578db525b9a7b6c8197ce02adbfce26 ]
+
+Gateway having bandwidth_down equal to zero are not accepted
+at all and so never added to the Gateway list.
+For this reason checking the bandwidth_down member in
+batadv_gw_out_of_range() is useless.
+
+This is probably a copy/paste error and this check was supposed
+to be "!gw_node" only. Moreover, the way the check is written
+now may also lead to a NULL dereference.
+
+Fix this by rewriting the if-condition properly.
+
+Introduced by 414254e342a0d58144de40c3da777521ebaeeb07
+("batman-adv: tvlv - gateway download/upload bandwidth container")
+
+Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
+Reported-by: David Binderman <dcb314@hotmail.com>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/gateway_client.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/batman-adv/gateway_client.c
++++ b/net/batman-adv/gateway_client.c
+@@ -810,7 +810,7 @@ bool batadv_gw_out_of_range(struct batad
+               goto out;
+       gw_node = batadv_gw_node_get(bat_priv, orig_dst_node);
+-      if (!gw_node->bandwidth_down == 0)
++      if (!gw_node)
+               goto out;
+       switch (atomic_read(&bat_priv->gw_mode)) {
diff --git a/queue-3.18/batman-adv-calculate-extra-tail-size-based-on-queued-fragments.patch b/queue-3.18/batman-adv-calculate-extra-tail-size-based-on-queued-fragments.patch
new file mode 100644 (file)
index 0000000..4e57a2a
--- /dev/null
@@ -0,0 +1,61 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Sven Eckelmann <sven@narfation.org>
+Date: Sat, 20 Dec 2014 13:48:55 +0100
+Subject: batman-adv: Calculate extra tail size based on queued fragments
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Sven Eckelmann <sven@narfation.org>
+
+[ Upstream commit 5b6698b0e4a37053de35cc24ee695b98a7eb712b ]
+
+The fragmentation code was replaced in 610bfc6bc99bc83680d190ebc69359a05fc7f605
+("batman-adv: Receive fragmented packets and merge"). The new code provided a
+mostly unused parameter skb for the merging function. It is used inside the
+function to calculate the additionally needed skb tailroom. But instead of
+increasing its own tailroom, it is only increasing the tailroom of the first
+queued skb. This is not correct in some situations because the first queued
+entry can be a different one than the parameter.
+
+An observed problem was:
+
+1. packet with size 104, total_size 1464, fragno 1 was received
+   - packet is queued
+2. packet with size 1400, total_size 1464, fragno 0 was received
+   - packet is queued at the end of the list
+3. enough data was received and can be given to the merge function
+   (1464 == (1400 - 20) + (104 - 20))
+   - merge functions gets 1400 byte large packet as skb argument
+4. merge function gets first entry in queue (104 byte)
+   - stored as skb_out
+5. merge function calculates the required extra tail as total_size - skb->len
+   - pskb_expand_head tail of skb_out with 64 bytes
+6. merge function tries to squeeze the extra 1380 bytes from the second queued
+   skb (1400 byte aka skb parameter) in the 64 extra tail bytes of skb_out
+
+Instead calculate the extra required tail bytes for skb_out also using skb_out
+instead of using the parameter skb. The skb parameter is only used to get the
+total_size from the last received packet. This is also the total_size used to
+decide that all fragments were received.
+
+Reported-by: Philipp Psurek <philipp.psurek@gmail.com>
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Acked-by: Martin Hundebøll <martin@hundeboll.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/fragmentation.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/batman-adv/fragmentation.c
++++ b/net/batman-adv/fragmentation.c
+@@ -251,7 +251,7 @@ batadv_frag_merge_packets(struct hlist_h
+       kfree(entry);
+       /* Make room for the rest of the fragments. */
+-      if (pskb_expand_head(skb_out, 0, size - skb->len, GFP_ATOMIC) < 0) {
++      if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
+               kfree_skb(skb_out);
+               skb_out = NULL;
+               goto free;
diff --git a/queue-3.18/batman-adv-unify-fragment-size-calculation.patch b/queue-3.18/batman-adv-unify-fragment-size-calculation.patch
new file mode 100644 (file)
index 0000000..7b7f59b
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Sven Eckelmann <sven@narfation.org>
+Date: Sat, 20 Dec 2014 13:48:56 +0100
+Subject: batman-adv: Unify fragment size calculation
+
+From: Sven Eckelmann <sven@narfation.org>
+
+[ Upstream commit 0402e444cd199389b7fe47be68a67b817e09e097 ]
+
+The fragmentation code was replaced in 610bfc6bc99bc83680d190ebc69359a05fc7f605
+("batman-adv: Receive fragmented packets and merge") by an implementation which
+can handle up to 16 fragments of a packet. The packet is prepared for the split
+in fragments by the function batadv_frag_send_packet and the actual split is
+done by batadv_frag_create.
+
+Both functions calculate the size of a fragment themself. But their calculation
+differs because batadv_frag_send_packet also subtracts ETH_HLEN. Therefore,
+the check in batadv_frag_send_packet "can a full fragment can be created?" may
+return true even when batadv_frag_create cannot create a full fragment.
+
+The function batadv_frag_create doesn't check the size of the skb before
+splitting it and therefore might try to create a larger fragment than the
+remaining buffer. This creates an integer underflow and an invalid len is given
+to skb_split.
+
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/fragmentation.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/batman-adv/fragmentation.c
++++ b/net/batman-adv/fragmentation.c
+@@ -434,7 +434,7 @@ bool batadv_frag_send_packet(struct sk_b
+        * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
+        */
+       mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
+-      max_fragment_size = (mtu - header_size - ETH_HLEN);
++      max_fragment_size = mtu - header_size;
+       max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
+       /* Don't even try to fragment, if we need more than 16 fragments */
diff --git a/queue-3.18/enic-fix-rx-skb-checksum.patch b/queue-3.18/enic-fix-rx-skb-checksum.patch
new file mode 100644 (file)
index 0000000..856fdfd
--- /dev/null
@@ -0,0 +1,73 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Govindarajulu Varadarajan <_govind@gmx.com>
+Date: Thu, 18 Dec 2014 15:58:42 +0530
+Subject: enic: fix rx skb checksum
+
+From: Govindarajulu Varadarajan <_govind@gmx.com>
+
+[ Upstream commit 17e96834fd35997ca7cdfbf15413bcd5a36ad448 ]
+
+Hardware always provides compliment of IP pseudo checksum. Stack expects
+whole packet checksum without pseudo checksum if CHECKSUM_COMPLETE is set.
+
+This causes checksum error in nf & ovs.
+
+kernel: qg-19546f09-f2: hw csum failure
+kernel: CPU: 9 PID: 0 Comm: swapper/9 Tainted: GF          O--------------   3.10.0-123.8.1.el7.x86_64 #1
+kernel: Hardware name: Cisco Systems Inc UCSB-B200-M3/UCSB-B200-M3, BIOS B200M3.2.2.3.0.080820141339 08/08/2014
+kernel: ffff881218f40000 df68243feb35e3a8 ffff881237a43ab8 ffffffff815e237b
+kernel: ffff881237a43ad0 ffffffff814cd4ca ffff8829ec71eb00 ffff881237a43af0
+kernel: ffffffff814c6232 0000000000000286 ffff8829ec71eb00 ffff881237a43b00
+kernel: Call Trace:
+kernel: <IRQ>  [<ffffffff815e237b>] dump_stack+0x19/0x1b
+kernel: [<ffffffff814cd4ca>] netdev_rx_csum_fault+0x3a/0x40
+kernel: [<ffffffff814c6232>] __skb_checksum_complete_head+0x62/0x70
+kernel: [<ffffffff814c6251>] __skb_checksum_complete+0x11/0x20
+kernel: [<ffffffff8155a20c>] nf_ip_checksum+0xcc/0x100
+kernel: [<ffffffffa049edc7>] icmp_error+0x1f7/0x35c [nf_conntrack_ipv4]
+kernel: [<ffffffff814cf419>] ? netif_rx+0xb9/0x1d0
+kernel: [<ffffffffa040eb7b>] ? internal_dev_recv+0xdb/0x130 [openvswitch]
+kernel: [<ffffffffa04c8330>] nf_conntrack_in+0xf0/0xa80 [nf_conntrack]
+kernel: [<ffffffff81509380>] ? inet_del_offload+0x40/0x40
+kernel: [<ffffffffa049e302>] ipv4_conntrack_in+0x22/0x30 [nf_conntrack_ipv4]
+kernel: [<ffffffff815005ca>] nf_iterate+0xaa/0xc0
+kernel: [<ffffffff81509380>] ? inet_del_offload+0x40/0x40
+kernel: [<ffffffff81500664>] nf_hook_slow+0x84/0x140
+kernel: [<ffffffff81509380>] ? inet_del_offload+0x40/0x40
+kernel: [<ffffffff81509dd4>] ip_rcv+0x344/0x380
+
+Hardware verifies IP & tcp/udp header checksum but does not provide payload
+checksum, use CHECKSUM_UNNECESSARY. Set it only if its valid IP tcp/udp packet.
+
+Cc: Jiri Benc <jbenc@redhat.com>
+Cc: Stefan Assmann <sassmann@redhat.com>
+Reported-by: Sunil Choudhary <schoudha@redhat.com>
+Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com>
+Reviewed-by: Jiri Benc <jbenc@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cisco/enic/enic_main.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/cisco/enic/enic_main.c
++++ b/drivers/net/ethernet/cisco/enic/enic_main.c
+@@ -1059,10 +1059,14 @@ static void enic_rq_indicate_buf(struct
+                                    PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
+               }
+-              if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc) {
+-                      skb->csum = htons(checksum);
+-                      skb->ip_summed = CHECKSUM_COMPLETE;
+-              }
++              /* Hardware does not provide whole packet checksum. It only
++               * provides pseudo checksum. Since hw validates the packet
++               * checksum but not provide us the checksum value. use
++               * CHECSUM_UNNECESSARY.
++               */
++              if ((netdev->features & NETIF_F_RXCSUM) && tcp_udp_csum_ok &&
++                  ipv4_csum_ok)
++                      skb->ip_summed = CHECKSUM_UNNECESSARY;
+               if (vlan_stripped)
+                       __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
diff --git a/queue-3.18/geneve-fix-races-between-socket-add-and-release.patch b/queue-3.18/geneve-fix-races-between-socket-add-and-release.patch
new file mode 100644 (file)
index 0000000..07cc4fa
--- /dev/null
@@ -0,0 +1,61 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Jesse Gross <jesse@nicira.com>
+Date: Tue, 16 Dec 2014 18:25:32 -0800
+Subject: geneve: Fix races between socket add and release.
+
+From: Jesse Gross <jesse@nicira.com>
+
+[ Upstream commit 12069401d895ff84076a50189ca842c0696b84b2 ]
+
+Currently, searching for a socket to add a reference to is not
+synchronized with deletion of sockets. This can result in use
+after free if there is another operation that is removing a
+socket at the same time. Solving this requires both holding the
+appropriate lock and checking the refcount to ensure that it
+has not already hit zero.
+
+Inspired by a related (but not exactly the same) issue in the
+VXLAN driver.
+
+Fixes: 0b5e8b8e ("net: Add Geneve tunneling protocol driver")
+CC: Andy Zhou <azhou@nicira.com>
+Signed-off-by: Jesse Gross <jesse@nicira.com>
+Acked-by: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/geneve.c |   13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/net/ipv4/geneve.c
++++ b/net/ipv4/geneve.c
+@@ -302,6 +302,7 @@ struct geneve_sock *geneve_sock_add(stru
+                                   geneve_rcv_t *rcv, void *data,
+                                   bool no_share, bool ipv6)
+ {
++      struct geneve_net *gn = net_generic(net, geneve_net_id);
+       struct geneve_sock *gs;
+       gs = geneve_socket_create(net, port, rcv, data, ipv6);
+@@ -311,15 +312,15 @@ struct geneve_sock *geneve_sock_add(stru
+       if (no_share)   /* Return error if sharing is not allowed. */
+               return ERR_PTR(-EINVAL);
++      spin_lock(&gn->sock_lock);
+       gs = geneve_find_sock(net, port);
+-      if (gs) {
+-              if (gs->rcv == rcv)
+-                      atomic_inc(&gs->refcnt);
+-              else
++      if (gs && ((gs->rcv != rcv) ||
++                 !atomic_add_unless(&gs->refcnt, 1, 0)))
+                       gs = ERR_PTR(-EBUSY);
+-      } else {
++      spin_unlock(&gn->sock_lock);
++
++      if (!gs)
+               gs = ERR_PTR(-EINVAL);
+-      }
+       return gs;
+ }
diff --git a/queue-3.18/geneve-remove-socket-and-offload-handlers-at-destruction.patch b/queue-3.18/geneve-remove-socket-and-offload-handlers-at-destruction.patch
new file mode 100644 (file)
index 0000000..91a384e
--- /dev/null
@@ -0,0 +1,59 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Jesse Gross <jesse@nicira.com>
+Date: Tue, 16 Dec 2014 18:25:31 -0800
+Subject: geneve: Remove socket and offload handlers at destruction.
+
+From: Jesse Gross <jesse@nicira.com>
+
+[ Upstream commit 7ed767f73192d6daf673c6d885cd02d5f280ac1f ]
+
+Sockets aren't currently removed from the the global list when
+they are destroyed. In addition, offload handlers need to be cleaned
+up as well.
+
+Fixes: 0b5e8b8e ("net: Add Geneve tunneling protocol driver")
+CC: Andy Zhou <azhou@nicira.com>
+Signed-off-by: Jesse Gross <jesse@nicira.com>
+Acked-by: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/geneve.c |   17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+--- a/net/ipv4/geneve.c
++++ b/net/ipv4/geneve.c
+@@ -165,6 +165,15 @@ static void geneve_notify_add_rx_port(st
+       }
+ }
++static void geneve_notify_del_rx_port(struct geneve_sock *gs)
++{
++      struct sock *sk = gs->sock->sk;
++      sa_family_t sa_family = sk->sk_family;
++
++      if (sa_family == AF_INET)
++              udp_del_offload(&gs->udp_offloads);
++}
++
+ /* Callback from net/ipv4/udp.c to receive packets */
+ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+ {
+@@ -318,9 +327,17 @@ EXPORT_SYMBOL_GPL(geneve_sock_add);
+ void geneve_sock_release(struct geneve_sock *gs)
+ {
++      struct net *net = sock_net(gs->sock->sk);
++      struct geneve_net *gn = net_generic(net, geneve_net_id);
++
+       if (!atomic_dec_and_test(&gs->refcnt))
+               return;
++      spin_lock(&gn->sock_lock);
++      hlist_del_rcu(&gs->hlist);
++      geneve_notify_del_rx_port(gs);
++      spin_unlock(&gn->sock_lock);
++
+       queue_work(geneve_wq, &gs->del_work);
+ }
+ EXPORT_SYMBOL_GPL(geneve_sock_release);
diff --git a/queue-3.18/gre-fix-the-inner-mac-header-in-nbma-tunnel-xmit-path.patch b/queue-3.18/gre-fix-the-inner-mac-header-in-nbma-tunnel-xmit-path.patch
new file mode 100644 (file)
index 0000000..87062f0
--- /dev/null
@@ -0,0 +1,69 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Mon, 15 Dec 2014 09:24:13 +0200
+Subject: gre: fix the inner mac header in nbma tunnel xmit path
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+
+[ Upstream commit 8a0033a947403569caeca45fa5e6f7ba60d51974 ]
+
+The NBMA GRE tunnels temporarily push GRE header that contain the
+per-packet NBMA destination on the skb via header ops early in xmit
+path. It is the later pulled before the real GRE header is constructed.
+
+The inner mac was thus set differently in nbma case: the GRE header
+has been pushed by neighbor layer, and mac header points to beginning
+of the temporary gre header (set by dev_queue_xmit).
+
+Now that the offloads expect mac header to point to the gre payload,
+fix the xmit patch to:
+ - pull first the temporary gre header away
+ - and reset mac header to point to gre payload
+
+This fixes tso to work again with nbma tunnels.
+
+Fixes: 14051f0452a2 ("gre: Use inner mac length when computing tunnel length")
+Signed-off-by: Timo Teräs <timo.teras@iki.fi>
+Cc: Tom Herbert <therbert@google.com>
+Cc: Alexander Duyck <alexander.h.duyck@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_gre.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -252,10 +252,6 @@ static netdev_tx_t ipgre_xmit(struct sk_
+       struct ip_tunnel *tunnel = netdev_priv(dev);
+       const struct iphdr *tnl_params;
+-      skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
+-      if (IS_ERR(skb))
+-              goto out;
+-
+       if (dev->header_ops) {
+               /* Need space for new headers */
+               if (skb_cow_head(skb, dev->needed_headroom -
+@@ -268,6 +264,7 @@ static netdev_tx_t ipgre_xmit(struct sk_
+                * to gre header.
+                */
+               skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
++              skb_reset_mac_header(skb);
+       } else {
+               if (skb_cow_head(skb, dev->needed_headroom))
+                       goto free_skb;
+@@ -275,6 +272,10 @@ static netdev_tx_t ipgre_xmit(struct sk_
+               tnl_params = &tunnel->parms.iph;
+       }
++      skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
++      if (IS_ERR(skb))
++              goto out;
++
+       __gre_xmit(skb, dev, tnl_params, skb->protocol);
+       return NETDEV_TX_OK;
diff --git a/queue-3.18/in6-fix-conflict-with-glibc.patch b/queue-3.18/in6-fix-conflict-with-glibc.patch
new file mode 100644 (file)
index 0000000..711a895
--- /dev/null
@@ -0,0 +1,70 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: stephen hemminger <stephen@networkplumber.org>
+Date: Sat, 20 Dec 2014 12:15:49 -0800
+Subject: in6: fix conflict with glibc
+
+From: stephen hemminger <stephen@networkplumber.org>
+
+[ Upstream commit 6d08acd2d32e3e877579315dc3202d7a5f336d98 ]
+
+Resolve conflicts between glibc definition of IPV6 socket options
+and those defined in Linux headers. Looks like earlier efforts to
+solve this did not cover all the definitions.
+
+It resolves warnings during iproute2 build.
+Please consider for stable as well.
+
+Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/in6.h         |    3 ++-
+ include/uapi/linux/libc-compat.h |    3 +++
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/include/uapi/linux/in6.h
++++ b/include/uapi/linux/in6.h
+@@ -149,7 +149,7 @@ struct in6_flowlabel_req {
+ /*
+  *    IPV6 socket options
+  */
+-
++#if __UAPI_DEF_IPV6_OPTIONS
+ #define IPV6_ADDRFORM         1
+ #define IPV6_2292PKTINFO      2
+ #define IPV6_2292HOPOPTS      3
+@@ -196,6 +196,7 @@ struct in6_flowlabel_req {
+ #define IPV6_IPSEC_POLICY     34
+ #define IPV6_XFRM_POLICY      35
++#endif
+ /*
+  * Multicast:
+--- a/include/uapi/linux/libc-compat.h
++++ b/include/uapi/linux/libc-compat.h
+@@ -69,6 +69,7 @@
+ #define __UAPI_DEF_SOCKADDR_IN6               0
+ #define __UAPI_DEF_IPV6_MREQ          0
+ #define __UAPI_DEF_IPPROTO_V6         0
++#define __UAPI_DEF_IPV6_OPTIONS               0
+ #else
+@@ -82,6 +83,7 @@
+ #define __UAPI_DEF_SOCKADDR_IN6               1
+ #define __UAPI_DEF_IPV6_MREQ          1
+ #define __UAPI_DEF_IPPROTO_V6         1
++#define __UAPI_DEF_IPV6_OPTIONS               1
+ #endif /* _NETINET_IN_H */
+@@ -103,6 +105,7 @@
+ #define __UAPI_DEF_SOCKADDR_IN6               1
+ #define __UAPI_DEF_IPV6_MREQ          1
+ #define __UAPI_DEF_IPPROTO_V6         1
++#define __UAPI_DEF_IPV6_OPTIONS               1
+ /* Definitions for xattr.h */
+ #define __UAPI_DEF_XATTR              1
diff --git a/queue-3.18/net-core-handle-csum-for-checksum_complete-vxlan-forwarding.patch b/queue-3.18/net-core-handle-csum-for-checksum_complete-vxlan-forwarding.patch
new file mode 100644 (file)
index 0000000..62fb06d
--- /dev/null
@@ -0,0 +1,63 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Jay Vosburgh <jay.vosburgh@canonical.com>
+Date: Fri, 19 Dec 2014 15:32:00 -0800
+Subject: net/core: Handle csum for CHECKSUM_COMPLETE VXLAN forwarding
+
+From: Jay Vosburgh <jay.vosburgh@canonical.com>
+
+[ Upstream commit 2c26d34bbcc0b3f30385d5587aa232289e2eed8e ]
+
+When using VXLAN tunnels and a sky2 device, I have experienced
+checksum failures of the following type:
+
+[ 4297.761899] eth0: hw csum failure
+[...]
+[ 4297.765223] Call Trace:
+[ 4297.765224]  <IRQ>  [<ffffffff8172f026>] dump_stack+0x46/0x58
+[ 4297.765235]  [<ffffffff8162ba52>] netdev_rx_csum_fault+0x42/0x50
+[ 4297.765238]  [<ffffffff8161c1a0>] ? skb_push+0x40/0x40
+[ 4297.765240]  [<ffffffff8162325c>] __skb_checksum_complete+0xbc/0xd0
+[ 4297.765243]  [<ffffffff8168c602>] tcp_v4_rcv+0x2e2/0x950
+[ 4297.765246]  [<ffffffff81666ca0>] ? ip_rcv_finish+0x360/0x360
+
+       These are reliably reproduced in a network topology of:
+
+container:eth0 == host(OVS VXLAN on VLAN) == bond0 == eth0 (sky2) -> switch
+
+       When VXLAN encapsulated traffic is received from a similarly
+configured peer, the above warning is generated in the receive
+processing of the encapsulated packet.  Note that the warning is
+associated with the container eth0.
+
+        The skbs from sky2 have ip_summed set to CHECKSUM_COMPLETE, and
+because the packet is an encapsulated Ethernet frame, the checksum
+generated by the hardware includes the inner protocol and Ethernet
+headers.
+
+       The receive code is careful to update the skb->csum, except in
+__dev_forward_skb, as called by dev_forward_skb.  __dev_forward_skb
+calls eth_type_trans, which in turn calls skb_pull_inline(skb, ETH_HLEN)
+to skip over the Ethernet header, but does not update skb->csum when
+doing so.
+
+       This patch resolves the problem by adding a call to
+skb_postpull_rcsum to update the skb->csum after the call to
+eth_type_trans.
+
+Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1697,6 +1697,7 @@ int __dev_forward_skb(struct net_device
+       skb_scrub_packet(skb, true);
+       skb->protocol = eth_type_trans(skb, dev);
++      skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+       return 0;
+ }
diff --git a/queue-3.18/net-drop-the-packet-when-fails-to-do-software-segmentation-or-header-check.patch b/queue-3.18/net-drop-the-packet-when-fails-to-do-software-segmentation-or-header-check.patch
new file mode 100644 (file)
index 0000000..34befa5
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Jason Wang <jasowang@redhat.com>
+Date: Fri, 19 Dec 2014 11:09:13 +0800
+Subject: net: drop the packet when fails to do software segmentation or header check
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit af6dabc9c70ae3f307685b1f32f52d60b1bf0527 ]
+
+Commit cecda693a969816bac5e470e1d9c9c0ef5567bca ("net: keep original skb
+which only needs header checking during software GSO") keeps the original
+skb for packets that only needs header check, but it doesn't drop the
+packet if software segmentation or header check were failed.
+
+Fixes cecda693a9 ("net: keep original skb which only needs header checking during software GSO")
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2680,7 +2680,7 @@ static struct sk_buff *validate_xmit_skb
+               segs = skb_gso_segment(skb, features);
+               if (IS_ERR(segs)) {
+-                      segs = NULL;
++                      goto out_kfree_skb;
+               } else if (segs) {
+                       consume_skb(skb);
+                       skb = segs;
diff --git a/queue-3.18/net-fix-stacked-vlan-offload-features-computation.patch b/queue-3.18/net-fix-stacked-vlan-offload-features-computation.patch
new file mode 100644 (file)
index 0000000..e9b30e2
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Mon, 22 Dec 2014 19:04:14 +0900
+Subject: net: Fix stacked vlan offload features computation
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 796f2da81bead71ffc91ef70912cd8d1827bf756 ]
+
+When vlan tags are stacked, it is very likely that the outer tag is stored
+in skb->vlan_tci and skb->protocol shows the inner tag's vlan_proto.
+Currently netif_skb_features() first looks at skb->protocol even if there
+is the outer tag in vlan_tci, thus it incorrectly retrieves the protocol
+encapsulated by the inner vlan instead of the inner vlan protocol.
+This allows GSO packets to be passed to HW and they end up being
+corrupted.
+
+Fixes: 58e998c6d239 ("offloading: Force software GSO for multiple vlan tags.")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2573,11 +2573,14 @@ netdev_features_t netif_skb_features(str
+       if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
+               features &= ~NETIF_F_GSO_MASK;
+-      if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
+-              struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+-              protocol = veh->h_vlan_encapsulated_proto;
+-      } else if (!vlan_tx_tag_present(skb)) {
+-              return harmonize_features(skb, features);
++      if (!vlan_tx_tag_present(skb)) {
++              if (unlikely(protocol == htons(ETH_P_8021Q) ||
++                           protocol == htons(ETH_P_8021AD))) {
++                      struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
++                      protocol = veh->h_vlan_encapsulated_proto;
++              } else {
++                      return harmonize_features(skb, features);
++              }
+       }
+       features = netdev_intersect_features(features,
diff --git a/queue-3.18/net-generalize-ndo_gso_check-to-ndo_features_check.patch b/queue-3.18/net-generalize-ndo_gso_check-to-ndo_features_check.patch
new file mode 100644 (file)
index 0000000..9941c5c
--- /dev/null
@@ -0,0 +1,285 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Jesse Gross <jesse@nicira.com>
+Date: Tue, 23 Dec 2014 22:37:26 -0800
+Subject: net: Generalize ndo_gso_check to ndo_features_check
+
+From: Jesse Gross <jesse@nicira.com>
+
+[ Upstream commit 5f35227ea34bb616c436d9da47fc325866c428f3 ]
+
+GSO isn't the only offload feature with restrictions that
+potentially can't be expressed with the current features mechanism.
+Checksum is another although it's a general issue that could in
+theory apply to anything. Even if it may be possible to
+implement these restrictions in other ways, it can result in
+duplicate code or inefficient per-packet behavior.
+
+This generalizes ndo_gso_check so that drivers can remove any
+features that don't make sense for a given packet, similar to
+netif_skb_features(). It also converts existing driver
+restrictions to the new format, completing the work that was
+done to support tunnel protocols since the issues apply to
+checksums as well.
+
+By actually removing features from the set that are used to do
+offloading, it solves another problem with the existing
+interface. In these cases, GSO would run with the original set
+of features and not do anything because it appears that
+segmentation is not required.
+
+CC: Tom Herbert <therbert@google.com>
+CC: Joe Stringer <joestringer@nicira.com>
+CC: Eric Dumazet <edumazet@google.com>
+CC: Hayes Wang <hayeswang@realtek.com>
+Signed-off-by: Jesse Gross <jesse@nicira.com>
+Acked-by:  Tom Herbert <therbert@google.com>
+Fixes: 04ffcb255f22 ("net: Add ndo_gso_check")
+Tested-by: Hayes Wang <hayeswang@realtek.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/emulex/benet/be_main.c      |    8 ++++--
+ drivers/net/ethernet/mellanox/mlx4/en_netdev.c   |   10 ++++----
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |    8 ++++--
+ include/linux/netdevice.h                        |   20 +++++++++-------
+ include/net/vxlan.h                              |   28 +++++++++++++++++++----
+ net/core/dev.c                                   |   23 +++++++++++-------
+ 6 files changed, 65 insertions(+), 32 deletions(-)
+
+--- a/drivers/net/ethernet/emulex/benet/be_main.c
++++ b/drivers/net/ethernet/emulex/benet/be_main.c
+@@ -4427,9 +4427,11 @@ static void be_del_vxlan_port(struct net
+                be16_to_cpu(port));
+ }
+-static bool be_gso_check(struct sk_buff *skb, struct net_device *dev)
++static netdev_features_t be_features_check(struct sk_buff *skb,
++                                         struct net_device *dev,
++                                         netdev_features_t features)
+ {
+-      return vxlan_gso_check(skb);
++      return vxlan_features_check(skb, features);
+ }
+ #endif
+@@ -4460,7 +4462,7 @@ static const struct net_device_ops be_ne
+ #ifdef CONFIG_BE2NET_VXLAN
+       .ndo_add_vxlan_port     = be_add_vxlan_port,
+       .ndo_del_vxlan_port     = be_del_vxlan_port,
+-      .ndo_gso_check          = be_gso_check,
++      .ndo_features_check     = be_features_check,
+ #endif
+ };
+--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+@@ -2363,9 +2363,11 @@ static void mlx4_en_del_vxlan_port(struc
+       queue_work(priv->mdev->workqueue, &priv->vxlan_del_task);
+ }
+-static bool mlx4_en_gso_check(struct sk_buff *skb, struct net_device *dev)
++static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
++                                              struct net_device *dev,
++                                              netdev_features_t features)
+ {
+-      return vxlan_gso_check(skb);
++      return vxlan_features_check(skb, features);
+ }
+ #endif
+@@ -2398,7 +2400,7 @@ static const struct net_device_ops mlx4_
+ #ifdef CONFIG_MLX4_EN_VXLAN
+       .ndo_add_vxlan_port     = mlx4_en_add_vxlan_port,
+       .ndo_del_vxlan_port     = mlx4_en_del_vxlan_port,
+-      .ndo_gso_check          = mlx4_en_gso_check,
++      .ndo_features_check     = mlx4_en_features_check,
+ #endif
+ };
+@@ -2432,7 +2434,7 @@ static const struct net_device_ops mlx4_
+ #ifdef CONFIG_MLX4_EN_VXLAN
+       .ndo_add_vxlan_port     = mlx4_en_add_vxlan_port,
+       .ndo_del_vxlan_port     = mlx4_en_del_vxlan_port,
+-      .ndo_gso_check          = mlx4_en_gso_check,
++      .ndo_features_check     = mlx4_en_features_check,
+ #endif
+ };
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+@@ -504,9 +504,11 @@ static void qlcnic_del_vxlan_port(struct
+       adapter->flags |= QLCNIC_DEL_VXLAN_PORT;
+ }
+-static bool qlcnic_gso_check(struct sk_buff *skb, struct net_device *dev)
++static netdev_features_t qlcnic_features_check(struct sk_buff *skb,
++                                             struct net_device *dev,
++                                             netdev_features_t features)
+ {
+-      return vxlan_gso_check(skb);
++      return vxlan_features_check(skb, features);
+ }
+ #endif
+@@ -531,7 +533,7 @@ static const struct net_device_ops qlcni
+ #ifdef CONFIG_QLCNIC_VXLAN
+       .ndo_add_vxlan_port     = qlcnic_add_vxlan_port,
+       .ndo_del_vxlan_port     = qlcnic_del_vxlan_port,
+-      .ndo_gso_check          = qlcnic_gso_check,
++      .ndo_features_check     = qlcnic_features_check,
+ #endif
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller = qlcnic_poll_controller,
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -998,12 +998,15 @@ typedef u16 (*select_queue_fallback_t)(s
+  *    Callback to use for xmit over the accelerated station. This
+  *    is used in place of ndo_start_xmit on accelerated net
+  *    devices.
+- * bool       (*ndo_gso_check) (struct sk_buff *skb,
+- *                      struct net_device *dev);
++ * netdev_features_t (*ndo_features_check) (struct sk_buff *skb,
++ *                                        struct net_device *dev
++ *                                        netdev_features_t features);
+  *    Called by core transmit path to determine if device is capable of
+- *    performing GSO on a packet. The device returns true if it is
+- *    able to GSO the packet, false otherwise. If the return value is
+- *    false the stack will do software GSO.
++ *    performing offload operations on a given packet. This is to give
++ *    the device an opportunity to implement any restrictions that cannot
++ *    be otherwise expressed by feature flags. The check is called with
++ *    the set of features that the stack has calculated and it returns
++ *    those the driver believes to be appropriate.
+  */
+ struct net_device_ops {
+       int                     (*ndo_init)(struct net_device *dev);
+@@ -1153,8 +1156,9 @@ struct net_device_ops {
+                                                       struct net_device *dev,
+                                                       void *priv);
+       int                     (*ndo_get_lock_subclass)(struct net_device *dev);
+-      bool                    (*ndo_gso_check) (struct sk_buff *skb,
+-                                                struct net_device *dev);
++      netdev_features_t       (*ndo_features_check) (struct sk_buff *skb,
++                                                     struct net_device *dev,
++                                                     netdev_features_t features);
+ };
+ /**
+@@ -3584,8 +3588,6 @@ static inline bool netif_needs_gso(struc
+                                  netdev_features_t features)
+ {
+       return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
+-              (dev->netdev_ops->ndo_gso_check &&
+-               !dev->netdev_ops->ndo_gso_check(skb, dev)) ||
+               unlikely((skb->ip_summed != CHECKSUM_PARTIAL) &&
+                        (skb->ip_summed != CHECKSUM_UNNECESSARY)));
+ }
+--- a/include/net/vxlan.h
++++ b/include/net/vxlan.h
+@@ -1,6 +1,9 @@
+ #ifndef __NET_VXLAN_H
+ #define __NET_VXLAN_H 1
++#include <linux/ip.h>
++#include <linux/ipv6.h>
++#include <linux/if_vlan.h>
+ #include <linux/skbuff.h>
+ #include <linux/netdevice.h>
+ #include <linux/udp.h>
+@@ -51,16 +54,33 @@ int vxlan_xmit_skb(struct vxlan_sock *vs
+                  __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
+                  __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
+-static inline bool vxlan_gso_check(struct sk_buff *skb)
++static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
++                                                   netdev_features_t features)
+ {
+-      if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) &&
++      u8 l4_hdr = 0;
++
++      if (!skb->encapsulation)
++              return features;
++
++      switch (vlan_get_protocol(skb)) {
++      case htons(ETH_P_IP):
++              l4_hdr = ip_hdr(skb)->protocol;
++              break;
++      case htons(ETH_P_IPV6):
++              l4_hdr = ipv6_hdr(skb)->nexthdr;
++              break;
++      default:
++              return features;;
++      }
++
++      if ((l4_hdr == IPPROTO_UDP) &&
+           (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
+            skb->inner_protocol != htons(ETH_P_TEB) ||
+            (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
+             sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
+-              return false;
++              return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
+-      return true;
++      return features;
+ }
+ /* IP header + UDP + VXLAN + Ethernet header */
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2566,7 +2566,7 @@ static netdev_features_t harmonize_featu
+ netdev_features_t netif_skb_features(struct sk_buff *skb)
+ {
+-      const struct net_device *dev = skb->dev;
++      struct net_device *dev = skb->dev;
+       netdev_features_t features = dev->features;
+       u16 gso_segs = skb_shinfo(skb)->gso_segs;
+       __be16 protocol = skb->protocol;
+@@ -2574,13 +2574,20 @@ netdev_features_t netif_skb_features(str
+       if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
+               features &= ~NETIF_F_GSO_MASK;
++      /* If encapsulation offload request, verify we are testing
++       * hardware encapsulation features instead of standard
++       * features for the netdev
++       */
++      if (skb->encapsulation)
++              features &= dev->hw_enc_features;
++
+       if (!vlan_tx_tag_present(skb)) {
+               if (unlikely(protocol == htons(ETH_P_8021Q) ||
+                            protocol == htons(ETH_P_8021AD))) {
+                       struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+                       protocol = veh->h_vlan_encapsulated_proto;
+               } else {
+-                      return harmonize_features(skb, features);
++                      goto finalize;
+               }
+       }
+@@ -2598,6 +2605,11 @@ netdev_features_t netif_skb_features(str
+                                                    NETIF_F_HW_VLAN_CTAG_TX |
+                                                    NETIF_F_HW_VLAN_STAG_TX);
++finalize:
++      if (dev->netdev_ops->ndo_features_check)
++              features &= dev->netdev_ops->ndo_features_check(skb, dev,
++                                                              features);
++
+       return harmonize_features(skb, features);
+ }
+ EXPORT_SYMBOL(netif_skb_features);
+@@ -2672,13 +2684,6 @@ static struct sk_buff *validate_xmit_skb
+       if (unlikely(!skb))
+               goto out_null;
+-      /* If encapsulation offload request, verify we are testing
+-       * hardware encapsulation features instead of standard
+-       * features for the netdev
+-       */
+-      if (skb->encapsulation)
+-              features &= dev->hw_enc_features;
+-
+       if (netif_needs_gso(dev, skb, features)) {
+               struct sk_buff *segs;
diff --git a/queue-3.18/net-mlx4-cache-line-cqe-eqe-stride-fixes.patch b/queue-3.18/net-mlx4-cache-line-cqe-eqe-stride-fixes.patch
new file mode 100644 (file)
index 0000000..3163e15
--- /dev/null
@@ -0,0 +1,59 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Ido Shamay <idos@mellanox.com>
+Date: Tue, 16 Dec 2014 13:28:54 +0200
+Subject: net/mlx4: Cache line CQE/EQE stride fixes
+
+From: Ido Shamay <idos@mellanox.com>
+
+[ Upstream commit c3f2511feac088030055012cc8f64ebd84c87dbc ]
+
+This commit contains 2 fixes for the 128B CQE/EQE stride feaure.
+Wei found that mlx4_QUERY_HCA function marked the wrong capability
+in flags (64B CQE/EQE), when CQE/EQE stride feature was enabled.
+Also added small fix in initial CQE ownership bit assignment, when CQE
+is size is not default 32B.
+
+Fixes: 77507aa24 (net/mlx4: Enable CQE/EQE stride support)
+Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
+Signed-off-by: Ido Shamay <idos@mellanox.com>
+Signed-off-by: Amir Vadai <amirv@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_netdev.c |   11 +++++++++--
+ drivers/net/ethernet/mellanox/mlx4/fw.c        |    4 ++--
+ 2 files changed, 11 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+@@ -1569,8 +1569,15 @@ int mlx4_en_start_port(struct net_device
+                       mlx4_en_free_affinity_hint(priv, i);
+                       goto cq_err;
+               }
+-              for (j = 0; j < cq->size; j++)
+-                      cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK;
++
++              for (j = 0; j < cq->size; j++) {
++                      struct mlx4_cqe *cqe = NULL;
++
++                      cqe = mlx4_en_get_cqe(cq->buf, j, priv->cqe_size) +
++                            priv->cqe_factor;
++                      cqe->owner_sr_opcode = MLX4_CQE_OWNER_MASK;
++              }
++
+               err = mlx4_en_set_cq_moder(priv, cq);
+               if (err) {
+                       en_err(priv, "Failed setting cq moderation parameters\n");
+--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
++++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
+@@ -1647,8 +1647,8 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+       /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
+       MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET);
+       if (byte_field) {
+-              param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
+-              param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
++              param->dev_cap_enabled |= MLX4_DEV_CAP_EQE_STRIDE_ENABLED;
++              param->dev_cap_enabled |= MLX4_DEV_CAP_CQE_STRIDE_ENABLED;
+               param->cqe_size = 1 << ((byte_field &
+                                        MLX4_CQE_SIZE_MASK_STRIDE) + 5);
+               param->eqe_size = 1 << (((byte_field &
diff --git a/queue-3.18/net-mlx4_core-correcly-update-the-mtt-s-offset-in-the-mr-re-reg-flow.patch b/queue-3.18/net-mlx4_core-correcly-update-the-mtt-s-offset-in-the-mr-re-reg-flow.patch
new file mode 100644 (file)
index 0000000..1726e8e
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Maor Gottlieb <maorg@mellanox.com>
+Date: Tue, 30 Dec 2014 11:59:49 +0200
+Subject: net/mlx4_core: Correcly update the mtt's offset in the MR re-reg flow
+
+From: Maor Gottlieb <maorg@mellanox.com>
+
+[ Upstream commit a51e0df4c1e06afd7aba84496c14238e6b363caa ]
+
+Previously, mlx4_mt_rereg_write filled the MPT's entity_size with the
+old MTT's page shift, which could result in using an incorrect offset.
+Fix the initialization to be after we calculate the new MTT offset.
+
+In addition, assign mtt order to -1 after calling mlx4_mtt_cleanup. This
+is necessary in order to mark the MTT as invalid and avoid freeing it later.
+
+Fixes: e630664 ('mlx4_core: Add helper functions to support MR re-registration')
+Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
+Signed-off-by: Matan Barak <matanb@mellanox.com>
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/mr.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
++++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
+@@ -590,6 +590,7 @@ EXPORT_SYMBOL_GPL(mlx4_mr_free);
+ void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr)
+ {
+       mlx4_mtt_cleanup(dev, &mr->mtt);
++      mr->mtt.order = -1;
+ }
+ EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup);
+@@ -599,14 +600,14 @@ int mlx4_mr_rereg_mem_write(struct mlx4_
+ {
+       int err;
+-      mpt_entry->start       = cpu_to_be64(iova);
+-      mpt_entry->length      = cpu_to_be64(size);
+-      mpt_entry->entity_size = cpu_to_be32(page_shift);
+-
+       err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+       if (err)
+               return err;
++      mpt_entry->start       = cpu_to_be64(mr->iova);
++      mpt_entry->length      = cpu_to_be64(mr->size);
++      mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
++
+       mpt_entry->pd_flags &= cpu_to_be32(MLX4_MPT_PD_MASK |
+                                          MLX4_MPT_PD_FLAG_EN_INV);
+       mpt_entry->flags    &= cpu_to_be32(MLX4_MPT_FLAG_FREE |
diff --git a/queue-3.18/net-mlx4_en-doorbell-is-byteswapped-in-little-endian-archs.patch b/queue-3.18/net-mlx4_en-doorbell-is-byteswapped-in-little-endian-archs.patch
new file mode 100644 (file)
index 0000000..ab648a4
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Amir Vadai <amirv@mellanox.com>
+Date: Mon, 22 Dec 2014 10:21:57 +0200
+Subject: net/mlx4_en: Doorbell is byteswapped in Little Endian archs
+
+From: Amir Vadai <amirv@mellanox.com>
+
+[ Upstream commit 492f5add4be84652bbe13da8a250d60c6856a5c5 ]
+
+iowrite32() will byteswap it's argument on big endian archs.
+iowrite32be() will byteswap on little endian archs.
+Since we don't want to do this unnecessary byteswap on the fast path,
+doorbell is stored in the NIC's native endianness. Using the right
+iowrite() according to the arch endianness.
+
+CC: Wei Yang <weiyang@linux.vnet.ibm.com>
+CC: David Laight <david.laight@aculab.com>
+Fixes: 6a4e812 ("net/mlx4_en: Avoid calling bswap in tx fast path")
+Signed-off-by: Amir Vadai <amirv@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_tx.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+@@ -954,7 +954,17 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff
+               tx_desc->ctrl.owner_opcode = op_own;
+               if (send_doorbell) {
+                       wmb();
+-                      iowrite32(ring->doorbell_qpn,
++                      /* Since there is no iowrite*_native() that writes the
++                       * value as is, without byteswapping - using the one
++                       * the doesn't do byteswapping in the relevant arch
++                       * endianness.
++                       */
++#if defined(__LITTLE_ENDIAN)
++                      iowrite32(
++#else
++                      iowrite32be(
++#endif
++                                ring->doorbell_qpn,
+                                 ring->bf.uar->map + MLX4_SEND_DOORBELL);
+               } else {
+                       ring->xmit_more++;
diff --git a/queue-3.18/net-reset-secmark-when-scrubbing-packet.patch b/queue-3.18/net-reset-secmark-when-scrubbing-packet.patch
new file mode 100644 (file)
index 0000000..fdf511c
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Thomas Graf <tgraf@suug.ch>
+Date: Tue, 23 Dec 2014 01:13:18 +0100
+Subject: net: Reset secmark when scrubbing packet
+
+From: Thomas Graf <tgraf@suug.ch>
+
+[ Upstream commit b8fb4e0648a2ab3734140342002f68fb0c7d1602 ]
+
+skb_scrub_packet() is called when a packet switches between a context
+such as between underlay and overlay, between namespaces, or between
+L3 subnets.
+
+While we already scrub the packet mark, connection tracking entry,
+and cached destination, the security mark/context is left intact.
+
+It seems wrong to inherit the security context of a packet when going
+from overlay to underlay or across forwarding paths.
+
+Signed-off-by: Thomas Graf <tgraf@suug.ch>
+Acked-by: Flavio Leitner <fbl@sysclose.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skbuff.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -4040,6 +4040,7 @@ void skb_scrub_packet(struct sk_buff *sk
+       skb->ignore_df = 0;
+       skb_dst_drop(skb);
+       skb->mark = 0;
++      skb_init_secmark(skb);
+       secpath_reset(skb);
+       nf_reset(skb);
+       nf_reset_trace(skb);
diff --git a/queue-3.18/netlink-always-copy-on-mmap-tx.patch b/queue-3.18/netlink-always-copy-on-mmap-tx.patch
new file mode 100644 (file)
index 0000000..33292f6
--- /dev/null
@@ -0,0 +1,127 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: David Miller <davem@davemloft.net>
+Date: Tue, 16 Dec 2014 17:58:17 -0500
+Subject: netlink: Always copy on mmap TX.
+
+From: David Miller <davem@davemloft.net>
+
+[ Upstream commit 4682a0358639b29cf69437ed909c6221f8c89847 ]
+
+Checking the file f_count and the nlk->mapped count is not completely
+sufficient to prevent the mmap'd area contents from changing from
+under us during netlink mmap sendmsg() operations.
+
+Be careful to sample the header's length field only once, because this
+could change from under us as well.
+
+Fixes: 5fd96123ee19 ("netlink: implement memory mapped sendmsg()")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Daniel Borkmann <dborkman@redhat.com>
+Acked-by: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |   52 ++++++++++++++---------------------------------
+ 1 file changed, 16 insertions(+), 36 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -526,14 +526,14 @@ out:
+       return err;
+ }
+-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
++static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
+ {
+ #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
+       struct page *p_start, *p_end;
+       /* First page is flushed through netlink_{get,set}_status */
+       p_start = pgvec_to_page(hdr + PAGE_SIZE);
+-      p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
++      p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
+       while (p_start <= p_end) {
+               flush_dcache_page(p_start);
+               p_start++;
+@@ -715,24 +715,16 @@ static int netlink_mmap_sendmsg(struct s
+       struct nl_mmap_hdr *hdr;
+       struct sk_buff *skb;
+       unsigned int maxlen;
+-      bool excl = true;
+       int err = 0, len = 0;
+-      /* Netlink messages are validated by the receiver before processing.
+-       * In order to avoid userspace changing the contents of the message
+-       * after validation, the socket and the ring may only be used by a
+-       * single process, otherwise we fall back to copying.
+-       */
+-      if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 ||
+-          atomic_read(&nlk->mapped) > 1)
+-              excl = false;
+-
+       mutex_lock(&nlk->pg_vec_lock);
+       ring   = &nlk->tx_ring;
+       maxlen = ring->frame_size - NL_MMAP_HDRLEN;
+       do {
++              unsigned int nm_len;
++
+               hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
+               if (hdr == NULL) {
+                       if (!(msg->msg_flags & MSG_DONTWAIT) &&
+@@ -740,35 +732,23 @@ static int netlink_mmap_sendmsg(struct s
+                               schedule();
+                       continue;
+               }
+-              if (hdr->nm_len > maxlen) {
++
++              nm_len = ACCESS_ONCE(hdr->nm_len);
++              if (nm_len > maxlen) {
+                       err = -EINVAL;
+                       goto out;
+               }
+-              netlink_frame_flush_dcache(hdr);
++              netlink_frame_flush_dcache(hdr, nm_len);
+-              if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
+-                      skb = alloc_skb_head(GFP_KERNEL);
+-                      if (skb == NULL) {
+-                              err = -ENOBUFS;
+-                              goto out;
+-                      }
+-                      sock_hold(sk);
+-                      netlink_ring_setup_skb(skb, sk, ring, hdr);
+-                      NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
+-                      __skb_put(skb, hdr->nm_len);
+-                      netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
+-                      atomic_inc(&ring->pending);
+-              } else {
+-                      skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
+-                      if (skb == NULL) {
+-                              err = -ENOBUFS;
+-                              goto out;
+-                      }
+-                      __skb_put(skb, hdr->nm_len);
+-                      memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
+-                      netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
++              skb = alloc_skb(nm_len, GFP_KERNEL);
++              if (skb == NULL) {
++                      err = -ENOBUFS;
++                      goto out;
+               }
++              __skb_put(skb, nm_len);
++              memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
++              netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+               netlink_increment_head(ring);
+@@ -814,7 +794,7 @@ static void netlink_queue_mmaped_skb(str
+       hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
+       hdr->nm_uid     = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
+       hdr->nm_gid     = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
+-      netlink_frame_flush_dcache(hdr);
++      netlink_frame_flush_dcache(hdr, hdr->nm_len);
+       netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+       NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
diff --git a/queue-3.18/netlink-don-t-reorder-loads-stores-before-marking-mmap-netlink-frame-as-available.patch b/queue-3.18/netlink-don-t-reorder-loads-stores-before-marking-mmap-netlink-frame-as-available.patch
new file mode 100644 (file)
index 0000000..817d463
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Thomas Graf <tgraf@suug.ch>
+Date: Thu, 18 Dec 2014 10:30:26 +0000
+Subject: netlink: Don't reorder loads/stores before marking mmap netlink frame as available
+
+From: Thomas Graf <tgraf@suug.ch>
+
+[ Upstream commit a18e6a186f53af06937a2c268c72443336f4ab56 ]
+
+Each mmap Netlink frame contains a status field which indicates
+whether the frame is unused, reserved, contains data or needs to
+be skipped. Both loads and stores may not be reordeded and must
+complete before the status field is changed and another CPU might
+pick up the frame for use. Use an smp_mb() to cover needs of both
+types of callers to netlink_set_status(), callers which have been
+reading data frame from the frame, and callers which have been
+filling or releasing and thus writing to the frame.
+
+- Example code path requiring a smp_rmb():
+  memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
+  netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+
+- Example code path requiring a smp_wmb():
+  hdr->nm_uid  = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
+  hdr->nm_gid  = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
+  netlink_frame_flush_dcache(hdr);
+  netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+
+Fixes: f9c228 ("netlink: implement memory mapped recvmsg()")
+Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -551,9 +551,9 @@ static enum nl_mmap_status netlink_get_s
+ static void netlink_set_status(struct nl_mmap_hdr *hdr,
+                              enum nl_mmap_status status)
+ {
++      smp_mb();
+       hdr->nm_status = status;
+       flush_dcache_page(pgvec_to_page(hdr));
+-      smp_wmb();
+ }
+ static struct nl_mmap_hdr *
diff --git a/queue-3.18/tcp-do-not-apply-tso-segment-limit-to-non-tso-packets.patch b/queue-3.18/tcp-do-not-apply-tso-segment-limit-to-non-tso-packets.patch
new file mode 100644 (file)
index 0000000..46e69d3
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Thu, 1 Jan 2015 00:39:23 +1100
+Subject: tcp: Do not apply TSO segment limit to non-TSO packets
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit 843925f33fcc293d80acf2c5c8a78adf3344d49b ]
+
+Thomas Jarosch reported IPsec TCP stalls when a PMTU event occurs.
+
+In fact the problem was completely unrelated to IPsec.  The bug is
+also reproducible if you just disable TSO/GSO.
+
+The problem is that when the MSS goes down, existing queued packet
+on the TX queue that have not been transmitted yet all look like
+TSO packets and get treated as such.
+
+This then triggers a bug where tcp_mss_split_point tells us to
+generate a zero-sized packet on the TX queue.  Once that happens
+we're screwed because the zero-sized packet can never be removed
+by ACKs.
+
+Fixes: 1485348d242 ("tcp: Apply device TSO segment limit earlier")
+Reported-by: Thomas Jarosch <thomas.jarosch@intra2net.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+
+Cheers,
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1984,7 +1984,7 @@ static bool tcp_write_xmit(struct sock *
+               if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
+                       break;
+-              if (tso_segs == 1) {
++              if (tso_segs == 1 || !sk->sk_gso_max_segs) {
+                       if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
+                                                    (tcp_skb_is_last(sk, skb) ?
+                                                     nonagle : TCP_NAGLE_PUSH))))
+@@ -2020,7 +2020,7 @@ static bool tcp_write_xmit(struct sock *
+               }
+               limit = mss_now;
+-              if (tso_segs > 1 && !tcp_urg_mode(tp))
++              if (tso_segs > 1 && sk->sk_gso_max_segs && !tcp_urg_mode(tp))
+                       limit = tcp_mss_split_point(sk, skb, mss_now,
+                                                   min_t(unsigned int,
+                                                         cwnd_quota,
diff --git a/queue-3.18/tcp6-don-t-move-ip6cb-before-xfrm6_policy_check.patch b/queue-3.18/tcp6-don-t-move-ip6cb-before-xfrm6_policy_check.patch
new file mode 100644 (file)
index 0000000..73470de
--- /dev/null
@@ -0,0 +1,109 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Mon, 22 Dec 2014 18:22:48 +0100
+Subject: tcp6: don't move IP6CB before xfrm6_policy_check()
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+[ Upstream commit 2dc49d1680b534877fd20cce52557ea542bb06b6 ]
+
+When xfrm6_policy_check() is used, _decode_session6() is called after some
+intermediate functions. This function uses IP6CB(), thus TCP_SKB_CB() must be
+prepared after the call of xfrm6_policy_check().
+
+Before this patch, scenarii with IPv6 + TCP + IPsec Transport are broken.
+
+Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
+Reported-by: Huaibin Wang <huaibin.wang@6wind.com>
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/tcp_ipv6.c |   45 +++++++++++++++++++++++++++++----------------
+ 1 file changed, 29 insertions(+), 16 deletions(-)
+
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1385,6 +1385,28 @@ ipv6_pktoptions:
+       return 0;
+ }
++static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
++                         const struct tcphdr *th)
++{
++      /* This is tricky: we move IP6CB at its correct location into
++       * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
++       * _decode_session6() uses IP6CB().
++       * barrier() makes sure compiler won't play aliasing games.
++       */
++      memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
++              sizeof(struct inet6_skb_parm));
++      barrier();
++
++      TCP_SKB_CB(skb)->seq = ntohl(th->seq);
++      TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
++                                  skb->len - th->doff*4);
++      TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
++      TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
++      TCP_SKB_CB(skb)->tcp_tw_isn = 0;
++      TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
++      TCP_SKB_CB(skb)->sacked = 0;
++}
++
+ static int tcp_v6_rcv(struct sk_buff *skb)
+ {
+       const struct tcphdr *th;
+@@ -1416,24 +1438,9 @@ static int tcp_v6_rcv(struct sk_buff *sk
+       th = tcp_hdr(skb);
+       hdr = ipv6_hdr(skb);
+-      /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+-       * barrier() makes sure compiler wont play fool^Waliasing games.
+-       */
+-      memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
+-              sizeof(struct inet6_skb_parm));
+-      barrier();
+-
+-      TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+-      TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+-                                  skb->len - th->doff*4);
+-      TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+-      TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+-      TCP_SKB_CB(skb)->tcp_tw_isn = 0;
+-      TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
+-      TCP_SKB_CB(skb)->sacked = 0;
+       sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
+-                              tcp_v6_iif(skb));
++                              inet6_iif(skb));
+       if (!sk)
+               goto no_tcp_socket;
+@@ -1449,6 +1456,8 @@ process:
+       if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+               goto discard_and_relse;
++      tcp_v6_fill_cb(skb, hdr, th);
++
+ #ifdef CONFIG_TCP_MD5SIG
+       if (tcp_v6_inbound_md5_hash(sk, skb))
+               goto discard_and_relse;
+@@ -1480,6 +1489,8 @@ no_tcp_socket:
+       if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+               goto discard_it;
++      tcp_v6_fill_cb(skb, hdr, th);
++
+       if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ csum_error:
+               TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
+@@ -1503,6 +1514,8 @@ do_time_wait:
+               goto discard_it;
+       }
++      tcp_v6_fill_cb(skb, hdr, th);
++
+       if (skb->len < (th->doff<<2)) {
+               inet_twsk_put(inet_twsk(sk));
+               goto bad_packet;
diff --git a/queue-3.18/team-avoid-possible-underflow-of-count_pending-value-for-notify_peers-and-mcast_rejoin.patch b/queue-3.18/team-avoid-possible-underflow-of-count_pending-value-for-notify_peers-and-mcast_rejoin.patch
new file mode 100644 (file)
index 0000000..a87f152
--- /dev/null
@@ -0,0 +1,94 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Jiri Pirko <jiri@resnulli.us>
+Date: Wed, 14 Jan 2015 18:15:30 +0100
+Subject: team: avoid possible underflow of count_pending value for notify_peers and mcast_rejoin
+
+From: Jiri Pirko <jiri@resnulli.us>
+
+[ Upstream commit b0d11b42785b70e19bc6a3122eead3f7969a7589 ]
+
+This patch is fixing a race condition that may cause setting
+count_pending to -1, which results in unwanted big bulk of arp messages
+(in case of "notify peers").
+
+Consider following scenario:
+
+count_pending == 2
+   CPU0                                           CPU1
+                                       team_notify_peers_work
+                                         atomic_dec_and_test (dec count_pending to 1)
+                                         schedule_delayed_work
+ team_notify_peers
+   atomic_add (adding 1 to count_pending)
+                                       team_notify_peers_work
+                                         atomic_dec_and_test (dec count_pending to 1)
+                                         schedule_delayed_work
+                                       team_notify_peers_work
+                                         atomic_dec_and_test (dec count_pending to 0)
+   schedule_delayed_work
+                                       team_notify_peers_work
+                                         atomic_dec_and_test (dec count_pending to -1)
+
+Fix this race by using atomic_dec_if_positive - that will prevent
+count_pending running under 0.
+
+Fixes: fc423ff00df3a1955441 ("team: add peer notification")
+Fixes: 492b200efdd20b8fcfd  ("team: add support for sending multicast rejoins")
+Signed-off-by: Jiri Pirko <jiri@resnulli.us>
+Signed-off-by: Jiri Benc <jbenc@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/team/team.c |   16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/team/team.c
++++ b/drivers/net/team/team.c
+@@ -629,6 +629,7 @@ static int team_change_mode(struct team
+ static void team_notify_peers_work(struct work_struct *work)
+ {
+       struct team *team;
++      int val;
+       team = container_of(work, struct team, notify_peers.dw.work);
+@@ -636,9 +637,14 @@ static void team_notify_peers_work(struc
+               schedule_delayed_work(&team->notify_peers.dw, 0);
+               return;
+       }
++      val = atomic_dec_if_positive(&team->notify_peers.count_pending);
++      if (val < 0) {
++              rtnl_unlock();
++              return;
++      }
+       call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, team->dev);
+       rtnl_unlock();
+-      if (!atomic_dec_and_test(&team->notify_peers.count_pending))
++      if (val)
+               schedule_delayed_work(&team->notify_peers.dw,
+                                     msecs_to_jiffies(team->notify_peers.interval));
+ }
+@@ -669,6 +675,7 @@ static void team_notify_peers_fini(struc
+ static void team_mcast_rejoin_work(struct work_struct *work)
+ {
+       struct team *team;
++      int val;
+       team = container_of(work, struct team, mcast_rejoin.dw.work);
+@@ -676,9 +683,14 @@ static void team_mcast_rejoin_work(struc
+               schedule_delayed_work(&team->mcast_rejoin.dw, 0);
+               return;
+       }
++      val = atomic_dec_if_positive(&team->mcast_rejoin.count_pending);
++      if (val < 0) {
++              rtnl_unlock();
++              return;
++      }
+       call_netdevice_notifiers(NETDEV_RESEND_IGMP, team->dev);
+       rtnl_unlock();
+-      if (!atomic_dec_and_test(&team->mcast_rejoin.count_pending))
++      if (val)
+               schedule_delayed_work(&team->mcast_rejoin.dw,
+                                     msecs_to_jiffies(team->mcast_rejoin.interval));
+ }
diff --git a/queue-3.18/tg3-tg3_disable_ints-using-uninitialized-mailbox-value-to-disable-interrupts.patch b/queue-3.18/tg3-tg3_disable_ints-using-uninitialized-mailbox-value-to-disable-interrupts.patch
new file mode 100644 (file)
index 0000000..601080c
--- /dev/null
@@ -0,0 +1,87 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Prashant Sreedharan <prashant@broadcom.com>
+Date: Sat, 20 Dec 2014 12:16:17 -0800
+Subject: tg3: tg3_disable_ints using uninitialized mailbox value to disable interrupts
+
+From: Prashant Sreedharan <prashant@broadcom.com>
+
+[ Upstream commit 05b0aa579397b734f127af58e401a30784a1e315 ]
+
+During driver load in tg3_init_one, if the driver detects DMA activity before
+intializing the chip tg3_halt is called. As part of tg3_halt interrupts are
+disabled using routine tg3_disable_ints. This routine was using mailbox value
+which was not initialized (default value is 0). As a result driver was writing
+0x00000001 to pci config space register 0, which is the vendor id / device id.
+
+This driver bug was exposed because of the commit a7877b17a667 (PCI: Check only
+the Vendor ID to identify Configuration Request Retry). Also this issue is only
+seen in older generation chipsets like 5722 because config space write to offset
+0 from driver is possible. The newer generation chips ignore writes to offset 0.
+Also without commit a7877b17a667, for these older chips when a GRC reset is
+issued the Bootcode would reprogram the vendor id/device id, which is the reason
+this bug was masked earlier.
+
+Fixed by initializing the interrupt mailbox registers before calling tg3_halt.
+
+Please queue for -stable.
+
+Reported-by: Nils Holland <nholland@tisys.org>
+Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: Prashant Sreedharan <prashant@broadcom.com>
+Signed-off-by: Michael Chan <mchan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c |   34 +++++++++++++++++-----------------
+ 1 file changed, 17 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -17789,23 +17789,6 @@ static int tg3_init_one(struct pci_dev *
+               goto err_out_apeunmap;
+       }
+-      /*
+-       * Reset chip in case UNDI or EFI driver did not shutdown
+-       * DMA self test will enable WDMAC and we'll see (spurious)
+-       * pending DMA on the PCI bus at that point.
+-       */
+-      if ((tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE) ||
+-          (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
+-              tw32(MEMARB_MODE, MEMARB_MODE_ENABLE);
+-              tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
+-      }
+-
+-      err = tg3_test_dma(tp);
+-      if (err) {
+-              dev_err(&pdev->dev, "DMA engine test failed, aborting\n");
+-              goto err_out_apeunmap;
+-      }
+-
+       intmbx = MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW;
+       rcvmbx = MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW;
+       sndmbx = MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW;
+@@ -17850,6 +17833,23 @@ static int tg3_init_one(struct pci_dev *
+                       sndmbx += 0xc;
+       }
++      /*
++       * Reset chip in case UNDI or EFI driver did not shutdown
++       * DMA self test will enable WDMAC and we'll see (spurious)
++       * pending DMA on the PCI bus at that point.
++       */
++      if ((tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE) ||
++          (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
++              tw32(MEMARB_MODE, MEMARB_MODE_ENABLE);
++              tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
++      }
++
++      err = tg3_test_dma(tp);
++      if (err) {
++              dev_err(&pdev->dev, "DMA engine test failed, aborting\n");
++              goto err_out_apeunmap;
++      }
++
+       tg3_init_coal(tp);
+       pci_set_drvdata(pdev, dev);
diff --git a/queue-3.18/xen-netback-fixing-the-propagation-of-the-transmit-shaper-timeout.patch b/queue-3.18/xen-netback-fixing-the-propagation-of-the-transmit-shaper-timeout.patch
new file mode 100644 (file)
index 0000000..4e61228
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: "Palik, Imre" <imrep@amazon.de>
+Date: Tue, 6 Jan 2015 16:44:44 +0100
+Subject: xen-netback: fixing the propagation of the transmit shaper timeout
+
+From: "Palik, Imre" <imrep@amazon.de>
+
+[ Upstream commit 07ff890daeda31cf23173865edf50bcb03e100c3 ]
+
+Since e9ce7cb6b107 ("xen-netback: Factor queue-specific data into queue struct"),
+the transimt shaper timeout is always set to 0.  The value the user sets via
+xenbus is never propagated to the transmit shaper.
+
+This patch fixes the issue.
+
+Cc: Anthony Liguori <aliguori@amazon.com>
+Signed-off-by: Imre Palik <imrep@amazon.de>
+Acked-by: Ian Campbell <ian.campbell@citrix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netback/xenbus.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/xen-netback/xenbus.c
++++ b/drivers/net/xen-netback/xenbus.c
+@@ -736,6 +736,7 @@ static void connect(struct backend_info
+               }
+               queue->remaining_credit = credit_bytes;
++              queue->credit_usec = credit_usec;
+               err = connect_rings(be, queue);
+               if (err) {
diff --git a/queue-3.18/xen-netback-support-frontends-without-feature-rx-notify-again.patch b/queue-3.18/xen-netback-support-frontends-without-feature-rx-notify-again.patch
new file mode 100644 (file)
index 0000000..37833e8
--- /dev/null
@@ -0,0 +1,180 @@
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: David Vrabel <david.vrabel@citrix.com>
+Date: Thu, 18 Dec 2014 11:13:06 +0000
+Subject: xen-netback: support frontends without feature-rx-notify again
+
+From: David Vrabel <david.vrabel@citrix.com>
+
+[ Upstram commit 26c0e102585d5a4d311f5d6eb7f524d288e7f6b7 ]
+
+Commit bc96f648df1bbc2729abbb84513cf4f64273a1f1 (xen-netback: make
+feature-rx-notify mandatory) incorrectly assumed that there were no
+frontends in use that did not support this feature.  But the frontend
+driver in MiniOS does not and since this is used by (qemu) stubdoms,
+these stopped working.
+
+Netback sort of works as-is in this mode except:
+
+- If there are no Rx requests and the internal Rx queue fills, only
+  the drain timeout will wake the thread.  The default drain timeout
+  of 10 s would give unacceptable pauses.
+
+- If an Rx stall was detected and the internal Rx queue is drained,
+  then the Rx thread would never wake.
+
+Handle these two cases (when feature-rx-notify is disabled) by:
+
+- Reducing the drain timeout to 30 ms.
+
+- Disabling Rx stall detection.
+
+Reported-by: John <jw@nuclearfallout.net>
+Tested-by: John <jw@nuclearfallout.net>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Wei Liu <wei.liu2@citrix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netback/common.h    |    4 +++-
+ drivers/net/xen-netback/interface.c |    4 +++-
+ drivers/net/xen-netback/netback.c   |   27 ++++++++++++++-------------
+ drivers/net/xen-netback/xenbus.c    |   12 +++++++++---
+ 4 files changed, 29 insertions(+), 18 deletions(-)
+
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -230,6 +230,8 @@ struct xenvif {
+        */
+       bool disabled;
+       unsigned long status;
++      unsigned long drain_timeout;
++      unsigned long stall_timeout;
+       /* Queues */
+       struct xenvif_queue *queues;
+@@ -328,7 +330,7 @@ irqreturn_t xenvif_interrupt(int irq, vo
+ extern bool separate_tx_rx_irq;
+ extern unsigned int rx_drain_timeout_msecs;
+-extern unsigned int rx_drain_timeout_jiffies;
++extern unsigned int rx_stall_timeout_msecs;
+ extern unsigned int xenvif_max_queues;
+ #ifdef CONFIG_DEBUG_FS
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -166,7 +166,7 @@ static int xenvif_start_xmit(struct sk_b
+               goto drop;
+       cb = XENVIF_RX_CB(skb);
+-      cb->expires = jiffies + rx_drain_timeout_jiffies;
++      cb->expires = jiffies + vif->drain_timeout;
+       xenvif_rx_queue_tail(queue, skb);
+       xenvif_kick_thread(queue);
+@@ -414,6 +414,8 @@ struct xenvif *xenvif_alloc(struct devic
+       vif->ip_csum = 1;
+       vif->dev = dev;
+       vif->disabled = false;
++      vif->drain_timeout = msecs_to_jiffies(rx_drain_timeout_msecs);
++      vif->stall_timeout = msecs_to_jiffies(rx_stall_timeout_msecs);
+       /* Start out with no queues. */
+       vif->queues = NULL;
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -60,14 +60,12 @@ module_param(separate_tx_rx_irq, bool, 0
+  */
+ unsigned int rx_drain_timeout_msecs = 10000;
+ module_param(rx_drain_timeout_msecs, uint, 0444);
+-unsigned int rx_drain_timeout_jiffies;
+ /* The length of time before the frontend is considered unresponsive
+  * because it isn't providing Rx slots.
+  */
+-static unsigned int rx_stall_timeout_msecs = 60000;
++unsigned int rx_stall_timeout_msecs = 60000;
+ module_param(rx_stall_timeout_msecs, uint, 0444);
+-static unsigned int rx_stall_timeout_jiffies;
+ unsigned int xenvif_max_queues;
+ module_param_named(max_queues, xenvif_max_queues, uint, 0644);
+@@ -2022,7 +2020,7 @@ static bool xenvif_rx_queue_stalled(stru
+       return !queue->stalled
+               && prod - cons < XEN_NETBK_RX_SLOTS_MAX
+               && time_after(jiffies,
+-                            queue->last_rx_time + rx_stall_timeout_jiffies);
++                            queue->last_rx_time + queue->vif->stall_timeout);
+ }
+ static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
+@@ -2040,8 +2038,9 @@ static bool xenvif_have_rx_work(struct x
+ {
+       return (!skb_queue_empty(&queue->rx_queue)
+               && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
+-              || xenvif_rx_queue_stalled(queue)
+-              || xenvif_rx_queue_ready(queue)
++              || (queue->vif->stall_timeout &&
++                  (xenvif_rx_queue_stalled(queue)
++                   || xenvif_rx_queue_ready(queue)))
+               || kthread_should_stop()
+               || queue->vif->disabled;
+ }
+@@ -2094,6 +2093,9 @@ int xenvif_kthread_guest_rx(void *data)
+       struct xenvif_queue *queue = data;
+       struct xenvif *vif = queue->vif;
++      if (!vif->stall_timeout)
++              xenvif_queue_carrier_on(queue);
++
+       for (;;) {
+               xenvif_wait_for_rx_work(queue);
+@@ -2120,10 +2122,12 @@ int xenvif_kthread_guest_rx(void *data)
+                * while it's probably not responsive, drop the
+                * carrier so packets are dropped earlier.
+                */
+-              if (xenvif_rx_queue_stalled(queue))
+-                      xenvif_queue_carrier_off(queue);
+-              else if (xenvif_rx_queue_ready(queue))
+-                      xenvif_queue_carrier_on(queue);
++              if (vif->stall_timeout) {
++                      if (xenvif_rx_queue_stalled(queue))
++                              xenvif_queue_carrier_off(queue);
++                      else if (xenvif_rx_queue_ready(queue))
++                              xenvif_queue_carrier_on(queue);
++              }
+               /* Queued packets may have foreign pages from other
+                * domains.  These cannot be queued indefinitely as
+@@ -2194,9 +2198,6 @@ static int __init netback_init(void)
+       if (rc)
+               goto failed_init;
+-      rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
+-      rx_stall_timeout_jiffies = msecs_to_jiffies(rx_stall_timeout_msecs);
+-
+ #ifdef CONFIG_DEBUG_FS
+       xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
+       if (IS_ERR_OR_NULL(xen_netback_dbg_root))
+--- a/drivers/net/xen-netback/xenbus.c
++++ b/drivers/net/xen-netback/xenbus.c
+@@ -886,9 +886,15 @@ static int read_xenbus_vif_flags(struct
+               return -EOPNOTSUPP;
+       if (xenbus_scanf(XBT_NIL, dev->otherend,
+-                       "feature-rx-notify", "%d", &val) < 0 || val == 0) {
+-              xenbus_dev_fatal(dev, -EINVAL, "feature-rx-notify is mandatory");
+-              return -EINVAL;
++                       "feature-rx-notify", "%d", &val) < 0)
++              val = 0;
++      if (!val) {
++              /* - Reduce drain timeout to poll more frequently for
++               *   Rx requests.
++               * - Disable Rx stall detection.
++               */
++              be->vif->drain_timeout = msecs_to_jiffies(30);
++              be->vif->stall_timeout = 0;
+       }
+       if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",