--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 11 Jan 2015 10:32:18 -0800
+Subject: alx: fix alx_poll()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7a05dc64e2e4c611d89007b125b20c0d2a4d31a5 ]
+
+Commit d75b1ade567f ("net: less interrupt masking in NAPI") uncovered
+wrong alx_poll() behavior.
+
+A NAPI poll() handler is supposed to return exactly the budget when/if
+napi_complete() has not been called.
+
+It is also supposed to return number of frames that were received, so
+that netdev_budget can have a meaning.
+
+Also, in case of TX pressure, we still have to dequeue received
+packets : alx_clean_rx_irq() has to be called even if
+alx_clean_tx_irq(alx) returns false, otherwise device is half duplex.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Fixes: d75b1ade567f ("net: less interrupt masking in NAPI")
+Reported-by: Oded Gabbay <oded.gabbay@amd.com>
+Bisected-by: Oded Gabbay <oded.gabbay@amd.com>
+Tested-by: Oded Gabbay <oded.gabbay@amd.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/atheros/alx/main.c | 24 +++++++++++++-----------
+ 1 file changed, 13 insertions(+), 11 deletions(-)
+
+--- a/drivers/net/ethernet/atheros/alx/main.c
++++ b/drivers/net/ethernet/atheros/alx/main.c
+@@ -184,15 +184,16 @@ static void alx_schedule_reset(struct al
+ schedule_work(&alx->reset_wk);
+ }
+
+-static bool alx_clean_rx_irq(struct alx_priv *alx, int budget)
++static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
+ {
+ struct alx_rx_queue *rxq = &alx->rxq;
+ struct alx_rrd *rrd;
+ struct alx_buffer *rxb;
+ struct sk_buff *skb;
+ u16 length, rfd_cleaned = 0;
++ int work = 0;
+
+- while (budget > 0) {
++ while (work < budget) {
+ rrd = &rxq->rrd[rxq->rrd_read_idx];
+ if (!(rrd->word3 & cpu_to_le32(1 << RRD_UPDATED_SHIFT)))
+ break;
+@@ -203,7 +204,7 @@ static bool alx_clean_rx_irq(struct alx_
+ ALX_GET_FIELD(le32_to_cpu(rrd->word0),
+ RRD_NOR) != 1) {
+ alx_schedule_reset(alx);
+- return 0;
++ return work;
+ }
+
+ rxb = &rxq->bufs[rxq->read_idx];
+@@ -243,7 +244,7 @@ static bool alx_clean_rx_irq(struct alx_
+ }
+
+ napi_gro_receive(&alx->napi, skb);
+- budget--;
++ work++;
+
+ next_pkt:
+ if (++rxq->read_idx == alx->rx_ringsz)
+@@ -258,21 +259,22 @@ next_pkt:
+ if (rfd_cleaned)
+ alx_refill_rx_ring(alx, GFP_ATOMIC);
+
+- return budget > 0;
++ return work;
+ }
+
+ static int alx_poll(struct napi_struct *napi, int budget)
+ {
+ struct alx_priv *alx = container_of(napi, struct alx_priv, napi);
+ struct alx_hw *hw = &alx->hw;
+- bool complete = true;
+ unsigned long flags;
++ bool tx_complete;
++ int work;
+
+- complete = alx_clean_tx_irq(alx) &&
+- alx_clean_rx_irq(alx, budget);
++ tx_complete = alx_clean_tx_irq(alx);
++ work = alx_clean_rx_irq(alx, budget);
+
+- if (!complete)
+- return 1;
++ if (!tx_complete || work == budget)
++ return budget;
+
+ napi_complete(&alx->napi);
+
+@@ -284,7 +286,7 @@ static int alx_poll(struct napi_struct *
+
+ alx_post_write(hw);
+
+- return 0;
++ return work;
+ }
+
+ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Antonio Quartulli <antonio@meshcoding.com>
+Date: Sat, 20 Dec 2014 13:48:57 +0100
+Subject: batman-adv: avoid NULL dereferences and fix if check
+
+From: Antonio Quartulli <antonio@meshcoding.com>
+
+[ Upstream commit 0d1644919578db525b9a7b6c8197ce02adbfce26 ]
+
+Gateway having bandwidth_down equal to zero are not accepted
+at all and so never added to the Gateway list.
+For this reason checking the bandwidth_down member in
+batadv_gw_out_of_range() is useless.
+
+This is probably a copy/paste error and this check was supposed
+to be "!gw_node" only. Moreover, the way the check is written
+now may also lead to a NULL dereference.
+
+Fix this by rewriting the if-condition properly.
+
+Introduced by 414254e342a0d58144de40c3da777521ebaeeb07
+("batman-adv: tvlv - gateway download/upload bandwidth container")
+
+Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
+Reported-by: David Binderman <dcb314@hotmail.com>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/gateway_client.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/batman-adv/gateway_client.c
++++ b/net/batman-adv/gateway_client.c
+@@ -810,7 +810,7 @@ bool batadv_gw_out_of_range(struct batad
+ goto out;
+
+ gw_node = batadv_gw_node_get(bat_priv, orig_dst_node);
+- if (!gw_node->bandwidth_down == 0)
++ if (!gw_node)
+ goto out;
+
+ switch (atomic_read(&bat_priv->gw_mode)) {
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Sven Eckelmann <sven@narfation.org>
+Date: Sat, 20 Dec 2014 13:48:55 +0100
+Subject: batman-adv: Calculate extra tail size based on queued fragments
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Sven Eckelmann <sven@narfation.org>
+
+[ Upstream commit 5b6698b0e4a37053de35cc24ee695b98a7eb712b ]
+
+The fragmentation code was replaced in 610bfc6bc99bc83680d190ebc69359a05fc7f605
+("batman-adv: Receive fragmented packets and merge"). The new code provided a
+mostly unused parameter skb for the merging function. It is used inside the
+function to calculate the additionally needed skb tailroom. But instead of
+increasing its own tailroom, it is only increasing the tailroom of the first
+queued skb. This is not correct in some situations because the first queued
+entry can be a different one than the parameter.
+
+An observed problem was:
+
+1. packet with size 104, total_size 1464, fragno 1 was received
+ - packet is queued
+2. packet with size 1400, total_size 1464, fragno 0 was received
+ - packet is queued at the end of the list
+3. enough data was received and can be given to the merge function
+ (1464 == (1400 - 20) + (104 - 20))
+ - merge functions gets 1400 byte large packet as skb argument
+4. merge function gets first entry in queue (104 byte)
+ - stored as skb_out
+5. merge function calculates the required extra tail as total_size - skb->len
+ - pskb_expand_head tail of skb_out with 64 bytes
+6. merge function tries to squeeze the extra 1380 bytes from the second queued
+ skb (1400 byte aka skb parameter) in the 64 extra tail bytes of skb_out
+
+Instead calculate the extra required tail bytes for skb_out also using skb_out
+instead of using the parameter skb. The skb parameter is only used to get the
+total_size from the last received packet. This is also the total_size used to
+decide that all fragments were received.
+
+Reported-by: Philipp Psurek <philipp.psurek@gmail.com>
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Acked-by: Martin Hundebøll <martin@hundeboll.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/fragmentation.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/batman-adv/fragmentation.c
++++ b/net/batman-adv/fragmentation.c
+@@ -251,7 +251,7 @@ batadv_frag_merge_packets(struct hlist_h
+ kfree(entry);
+
+ /* Make room for the rest of the fragments. */
+- if (pskb_expand_head(skb_out, 0, size - skb->len, GFP_ATOMIC) < 0) {
++ if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
+ kfree_skb(skb_out);
+ skb_out = NULL;
+ goto free;
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Sven Eckelmann <sven@narfation.org>
+Date: Sat, 20 Dec 2014 13:48:56 +0100
+Subject: batman-adv: Unify fragment size calculation
+
+From: Sven Eckelmann <sven@narfation.org>
+
+[ Upstream commit 0402e444cd199389b7fe47be68a67b817e09e097 ]
+
+The fragmentation code was replaced in 610bfc6bc99bc83680d190ebc69359a05fc7f605
+("batman-adv: Receive fragmented packets and merge") by an implementation which
+can handle up to 16 fragments of a packet. The packet is prepared for the split
+in fragments by the function batadv_frag_send_packet and the actual split is
+done by batadv_frag_create.
+
+Both functions calculate the size of a fragment themself. But their calculation
+differs because batadv_frag_send_packet also subtracts ETH_HLEN. Therefore,
+the check in batadv_frag_send_packet "can a full fragment can be created?" may
+return true even when batadv_frag_create cannot create a full fragment.
+
+The function batadv_frag_create doesn't check the size of the skb before
+splitting it and therefore might try to create a larger fragment than the
+remaining buffer. This creates an integer underflow and an invalid len is given
+to skb_split.
+
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/fragmentation.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/batman-adv/fragmentation.c
++++ b/net/batman-adv/fragmentation.c
+@@ -434,7 +434,7 @@ bool batadv_frag_send_packet(struct sk_b
+ * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
+ */
+ mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
+- max_fragment_size = (mtu - header_size - ETH_HLEN);
++ max_fragment_size = mtu - header_size;
+ max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
+
+ /* Don't even try to fragment, if we need more than 16 fragments */
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Govindarajulu Varadarajan <_govind@gmx.com>
+Date: Thu, 18 Dec 2014 15:58:42 +0530
+Subject: enic: fix rx skb checksum
+
+From: Govindarajulu Varadarajan <_govind@gmx.com>
+
+[ Upstream commit 17e96834fd35997ca7cdfbf15413bcd5a36ad448 ]
+
+Hardware always provides compliment of IP pseudo checksum. Stack expects
+whole packet checksum without pseudo checksum if CHECKSUM_COMPLETE is set.
+
+This causes checksum error in nf & ovs.
+
+kernel: qg-19546f09-f2: hw csum failure
+kernel: CPU: 9 PID: 0 Comm: swapper/9 Tainted: GF O-------------- 3.10.0-123.8.1.el7.x86_64 #1
+kernel: Hardware name: Cisco Systems Inc UCSB-B200-M3/UCSB-B200-M3, BIOS B200M3.2.2.3.0.080820141339 08/08/2014
+kernel: ffff881218f40000 df68243feb35e3a8 ffff881237a43ab8 ffffffff815e237b
+kernel: ffff881237a43ad0 ffffffff814cd4ca ffff8829ec71eb00 ffff881237a43af0
+kernel: ffffffff814c6232 0000000000000286 ffff8829ec71eb00 ffff881237a43b00
+kernel: Call Trace:
+kernel: <IRQ> [<ffffffff815e237b>] dump_stack+0x19/0x1b
+kernel: [<ffffffff814cd4ca>] netdev_rx_csum_fault+0x3a/0x40
+kernel: [<ffffffff814c6232>] __skb_checksum_complete_head+0x62/0x70
+kernel: [<ffffffff814c6251>] __skb_checksum_complete+0x11/0x20
+kernel: [<ffffffff8155a20c>] nf_ip_checksum+0xcc/0x100
+kernel: [<ffffffffa049edc7>] icmp_error+0x1f7/0x35c [nf_conntrack_ipv4]
+kernel: [<ffffffff814cf419>] ? netif_rx+0xb9/0x1d0
+kernel: [<ffffffffa040eb7b>] ? internal_dev_recv+0xdb/0x130 [openvswitch]
+kernel: [<ffffffffa04c8330>] nf_conntrack_in+0xf0/0xa80 [nf_conntrack]
+kernel: [<ffffffff81509380>] ? inet_del_offload+0x40/0x40
+kernel: [<ffffffffa049e302>] ipv4_conntrack_in+0x22/0x30 [nf_conntrack_ipv4]
+kernel: [<ffffffff815005ca>] nf_iterate+0xaa/0xc0
+kernel: [<ffffffff81509380>] ? inet_del_offload+0x40/0x40
+kernel: [<ffffffff81500664>] nf_hook_slow+0x84/0x140
+kernel: [<ffffffff81509380>] ? inet_del_offload+0x40/0x40
+kernel: [<ffffffff81509dd4>] ip_rcv+0x344/0x380
+
+Hardware verifies IP & tcp/udp header checksum but does not provide payload
+checksum, use CHECKSUM_UNNECESSARY. Set it only if its valid IP tcp/udp packet.
+
+Cc: Jiri Benc <jbenc@redhat.com>
+Cc: Stefan Assmann <sassmann@redhat.com>
+Reported-by: Sunil Choudhary <schoudha@redhat.com>
+Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com>
+Reviewed-by: Jiri Benc <jbenc@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cisco/enic/enic_main.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/cisco/enic/enic_main.c
++++ b/drivers/net/ethernet/cisco/enic/enic_main.c
+@@ -1059,10 +1059,14 @@ static void enic_rq_indicate_buf(struct
+ PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
+ }
+
+- if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc) {
+- skb->csum = htons(checksum);
+- skb->ip_summed = CHECKSUM_COMPLETE;
+- }
++ /* Hardware does not provide whole packet checksum. It only
++ * provides pseudo checksum. Since hw validates the packet
++ * checksum but not provide us the checksum value. use
++ * CHECSUM_UNNECESSARY.
++ */
++ if ((netdev->features & NETIF_F_RXCSUM) && tcp_udp_csum_ok &&
++ ipv4_csum_ok)
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (vlan_stripped)
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Jesse Gross <jesse@nicira.com>
+Date: Tue, 16 Dec 2014 18:25:32 -0800
+Subject: geneve: Fix races between socket add and release.
+
+From: Jesse Gross <jesse@nicira.com>
+
+[ Upstream commit 12069401d895ff84076a50189ca842c0696b84b2 ]
+
+Currently, searching for a socket to add a reference to is not
+synchronized with deletion of sockets. This can result in use
+after free if there is another operation that is removing a
+socket at the same time. Solving this requires both holding the
+appropriate lock and checking the refcount to ensure that it
+has not already hit zero.
+
+Inspired by a related (but not exactly the same) issue in the
+VXLAN driver.
+
+Fixes: 0b5e8b8e ("net: Add Geneve tunneling protocol driver")
+CC: Andy Zhou <azhou@nicira.com>
+Signed-off-by: Jesse Gross <jesse@nicira.com>
+Acked-by: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/geneve.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/net/ipv4/geneve.c
++++ b/net/ipv4/geneve.c
+@@ -302,6 +302,7 @@ struct geneve_sock *geneve_sock_add(stru
+ geneve_rcv_t *rcv, void *data,
+ bool no_share, bool ipv6)
+ {
++ struct geneve_net *gn = net_generic(net, geneve_net_id);
+ struct geneve_sock *gs;
+
+ gs = geneve_socket_create(net, port, rcv, data, ipv6);
+@@ -311,15 +312,15 @@ struct geneve_sock *geneve_sock_add(stru
+ if (no_share) /* Return error if sharing is not allowed. */
+ return ERR_PTR(-EINVAL);
+
++ spin_lock(&gn->sock_lock);
+ gs = geneve_find_sock(net, port);
+- if (gs) {
+- if (gs->rcv == rcv)
+- atomic_inc(&gs->refcnt);
+- else
++ if (gs && ((gs->rcv != rcv) ||
++ !atomic_add_unless(&gs->refcnt, 1, 0)))
+ gs = ERR_PTR(-EBUSY);
+- } else {
++ spin_unlock(&gn->sock_lock);
++
++ if (!gs)
+ gs = ERR_PTR(-EINVAL);
+- }
+
+ return gs;
+ }
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Jesse Gross <jesse@nicira.com>
+Date: Tue, 16 Dec 2014 18:25:31 -0800
+Subject: geneve: Remove socket and offload handlers at destruction.
+
+From: Jesse Gross <jesse@nicira.com>
+
+[ Upstream commit 7ed767f73192d6daf673c6d885cd02d5f280ac1f ]
+
+Sockets aren't currently removed from the the global list when
+they are destroyed. In addition, offload handlers need to be cleaned
+up as well.
+
+Fixes: 0b5e8b8e ("net: Add Geneve tunneling protocol driver")
+CC: Andy Zhou <azhou@nicira.com>
+Signed-off-by: Jesse Gross <jesse@nicira.com>
+Acked-by: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/geneve.c | 17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+--- a/net/ipv4/geneve.c
++++ b/net/ipv4/geneve.c
+@@ -165,6 +165,15 @@ static void geneve_notify_add_rx_port(st
+ }
+ }
+
++static void geneve_notify_del_rx_port(struct geneve_sock *gs)
++{
++ struct sock *sk = gs->sock->sk;
++ sa_family_t sa_family = sk->sk_family;
++
++ if (sa_family == AF_INET)
++ udp_del_offload(&gs->udp_offloads);
++}
++
+ /* Callback from net/ipv4/udp.c to receive packets */
+ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+ {
+@@ -318,9 +327,17 @@ EXPORT_SYMBOL_GPL(geneve_sock_add);
+
+ void geneve_sock_release(struct geneve_sock *gs)
+ {
++ struct net *net = sock_net(gs->sock->sk);
++ struct geneve_net *gn = net_generic(net, geneve_net_id);
++
+ if (!atomic_dec_and_test(&gs->refcnt))
+ return;
+
++ spin_lock(&gn->sock_lock);
++ hlist_del_rcu(&gs->hlist);
++ geneve_notify_del_rx_port(gs);
++ spin_unlock(&gn->sock_lock);
++
+ queue_work(geneve_wq, &gs->del_work);
+ }
+ EXPORT_SYMBOL_GPL(geneve_sock_release);
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Mon, 15 Dec 2014 09:24:13 +0200
+Subject: gre: fix the inner mac header in nbma tunnel xmit path
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+
+[ Upstream commit 8a0033a947403569caeca45fa5e6f7ba60d51974 ]
+
+The NBMA GRE tunnels temporarily push GRE header that contain the
+per-packet NBMA destination on the skb via header ops early in xmit
+path. It is the later pulled before the real GRE header is constructed.
+
+The inner mac was thus set differently in nbma case: the GRE header
+has been pushed by neighbor layer, and mac header points to beginning
+of the temporary gre header (set by dev_queue_xmit).
+
+Now that the offloads expect mac header to point to the gre payload,
+fix the xmit patch to:
+ - pull first the temporary gre header away
+ - and reset mac header to point to gre payload
+
+This fixes tso to work again with nbma tunnels.
+
+Fixes: 14051f0452a2 ("gre: Use inner mac length when computing tunnel length")
+Signed-off-by: Timo Teräs <timo.teras@iki.fi>
+Cc: Tom Herbert <therbert@google.com>
+Cc: Alexander Duyck <alexander.h.duyck@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_gre.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -252,10 +252,6 @@ static netdev_tx_t ipgre_xmit(struct sk_
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct iphdr *tnl_params;
+
+- skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
+- if (IS_ERR(skb))
+- goto out;
+-
+ if (dev->header_ops) {
+ /* Need space for new headers */
+ if (skb_cow_head(skb, dev->needed_headroom -
+@@ -268,6 +264,7 @@ static netdev_tx_t ipgre_xmit(struct sk_
+ * to gre header.
+ */
+ skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
++ skb_reset_mac_header(skb);
+ } else {
+ if (skb_cow_head(skb, dev->needed_headroom))
+ goto free_skb;
+@@ -275,6 +272,10 @@ static netdev_tx_t ipgre_xmit(struct sk_
+ tnl_params = &tunnel->parms.iph;
+ }
+
++ skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
++ if (IS_ERR(skb))
++ goto out;
++
+ __gre_xmit(skb, dev, tnl_params, skb->protocol);
+
+ return NETDEV_TX_OK;
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: stephen hemminger <stephen@networkplumber.org>
+Date: Sat, 20 Dec 2014 12:15:49 -0800
+Subject: in6: fix conflict with glibc
+
+From: stephen hemminger <stephen@networkplumber.org>
+
+[ Upstream commit 6d08acd2d32e3e877579315dc3202d7a5f336d98 ]
+
+Resolve conflicts between glibc definition of IPV6 socket options
+and those defined in Linux headers. Looks like earlier efforts to
+solve this did not cover all the definitions.
+
+It resolves warnings during iproute2 build.
+Please consider for stable as well.
+
+Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/in6.h | 3 ++-
+ include/uapi/linux/libc-compat.h | 3 +++
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/include/uapi/linux/in6.h
++++ b/include/uapi/linux/in6.h
+@@ -149,7 +149,7 @@ struct in6_flowlabel_req {
+ /*
+ * IPV6 socket options
+ */
+-
++#if __UAPI_DEF_IPV6_OPTIONS
+ #define IPV6_ADDRFORM 1
+ #define IPV6_2292PKTINFO 2
+ #define IPV6_2292HOPOPTS 3
+@@ -196,6 +196,7 @@ struct in6_flowlabel_req {
+
+ #define IPV6_IPSEC_POLICY 34
+ #define IPV6_XFRM_POLICY 35
++#endif
+
+ /*
+ * Multicast:
+--- a/include/uapi/linux/libc-compat.h
++++ b/include/uapi/linux/libc-compat.h
+@@ -69,6 +69,7 @@
+ #define __UAPI_DEF_SOCKADDR_IN6 0
+ #define __UAPI_DEF_IPV6_MREQ 0
+ #define __UAPI_DEF_IPPROTO_V6 0
++#define __UAPI_DEF_IPV6_OPTIONS 0
+
+ #else
+
+@@ -82,6 +83,7 @@
+ #define __UAPI_DEF_SOCKADDR_IN6 1
+ #define __UAPI_DEF_IPV6_MREQ 1
+ #define __UAPI_DEF_IPPROTO_V6 1
++#define __UAPI_DEF_IPV6_OPTIONS 1
+
+ #endif /* _NETINET_IN_H */
+
+@@ -103,6 +105,7 @@
+ #define __UAPI_DEF_SOCKADDR_IN6 1
+ #define __UAPI_DEF_IPV6_MREQ 1
+ #define __UAPI_DEF_IPPROTO_V6 1
++#define __UAPI_DEF_IPV6_OPTIONS 1
+
+ /* Definitions for xattr.h */
+ #define __UAPI_DEF_XATTR 1
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Jay Vosburgh <jay.vosburgh@canonical.com>
+Date: Fri, 19 Dec 2014 15:32:00 -0800
+Subject: net/core: Handle csum for CHECKSUM_COMPLETE VXLAN forwarding
+
+From: Jay Vosburgh <jay.vosburgh@canonical.com>
+
+[ Upstream commit 2c26d34bbcc0b3f30385d5587aa232289e2eed8e ]
+
+When using VXLAN tunnels and a sky2 device, I have experienced
+checksum failures of the following type:
+
+[ 4297.761899] eth0: hw csum failure
+[...]
+[ 4297.765223] Call Trace:
+[ 4297.765224] <IRQ> [<ffffffff8172f026>] dump_stack+0x46/0x58
+[ 4297.765235] [<ffffffff8162ba52>] netdev_rx_csum_fault+0x42/0x50
+[ 4297.765238] [<ffffffff8161c1a0>] ? skb_push+0x40/0x40
+[ 4297.765240] [<ffffffff8162325c>] __skb_checksum_complete+0xbc/0xd0
+[ 4297.765243] [<ffffffff8168c602>] tcp_v4_rcv+0x2e2/0x950
+[ 4297.765246] [<ffffffff81666ca0>] ? ip_rcv_finish+0x360/0x360
+
+ These are reliably reproduced in a network topology of:
+
+container:eth0 == host(OVS VXLAN on VLAN) == bond0 == eth0 (sky2) -> switch
+
+ When VXLAN encapsulated traffic is received from a similarly
+configured peer, the above warning is generated in the receive
+processing of the encapsulated packet. Note that the warning is
+associated with the container eth0.
+
+ The skbs from sky2 have ip_summed set to CHECKSUM_COMPLETE, and
+because the packet is an encapsulated Ethernet frame, the checksum
+generated by the hardware includes the inner protocol and Ethernet
+headers.
+
+ The receive code is careful to update the skb->csum, except in
+__dev_forward_skb, as called by dev_forward_skb. __dev_forward_skb
+calls eth_type_trans, which in turn calls skb_pull_inline(skb, ETH_HLEN)
+to skip over the Ethernet header, but does not update skb->csum when
+doing so.
+
+ This patch resolves the problem by adding a call to
+skb_postpull_rcsum to update the skb->csum after the call to
+eth_type_trans.
+
+Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1697,6 +1697,7 @@ int __dev_forward_skb(struct net_device
+
+ skb_scrub_packet(skb, true);
+ skb->protocol = eth_type_trans(skb, dev);
++ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+
+ return 0;
+ }
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Jason Wang <jasowang@redhat.com>
+Date: Fri, 19 Dec 2014 11:09:13 +0800
+Subject: net: drop the packet when fails to do software segmentation or header check
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit af6dabc9c70ae3f307685b1f32f52d60b1bf0527 ]
+
+Commit cecda693a969816bac5e470e1d9c9c0ef5567bca ("net: keep original skb
+which only needs header checking during software GSO") keeps the original
+skb for packets that only needs header check, but it doesn't drop the
+packet if software segmentation or header check were failed.
+
+Fixes cecda693a9 ("net: keep original skb which only needs header checking during software GSO")
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2680,7 +2680,7 @@ static struct sk_buff *validate_xmit_skb
+
+ segs = skb_gso_segment(skb, features);
+ if (IS_ERR(segs)) {
+- segs = NULL;
++ goto out_kfree_skb;
+ } else if (segs) {
+ consume_skb(skb);
+ skb = segs;
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Mon, 22 Dec 2014 19:04:14 +0900
+Subject: net: Fix stacked vlan offload features computation
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 796f2da81bead71ffc91ef70912cd8d1827bf756 ]
+
+When vlan tags are stacked, it is very likely that the outer tag is stored
+in skb->vlan_tci and skb->protocol shows the inner tag's vlan_proto.
+Currently netif_skb_features() first looks at skb->protocol even if there
+is the outer tag in vlan_tci, thus it incorrectly retrieves the protocol
+encapsulated by the inner vlan instead of the inner vlan protocol.
+This allows GSO packets to be passed to HW and they end up being
+corrupted.
+
+Fixes: 58e998c6d239 ("offloading: Force software GSO for multiple vlan tags.")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2573,11 +2573,14 @@ netdev_features_t netif_skb_features(str
+ if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
+ features &= ~NETIF_F_GSO_MASK;
+
+- if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
+- struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+- protocol = veh->h_vlan_encapsulated_proto;
+- } else if (!vlan_tx_tag_present(skb)) {
+- return harmonize_features(skb, features);
++ if (!vlan_tx_tag_present(skb)) {
++ if (unlikely(protocol == htons(ETH_P_8021Q) ||
++ protocol == htons(ETH_P_8021AD))) {
++ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
++ protocol = veh->h_vlan_encapsulated_proto;
++ } else {
++ return harmonize_features(skb, features);
++ }
+ }
+
+ features = netdev_intersect_features(features,
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Jesse Gross <jesse@nicira.com>
+Date: Tue, 23 Dec 2014 22:37:26 -0800
+Subject: net: Generalize ndo_gso_check to ndo_features_check
+
+From: Jesse Gross <jesse@nicira.com>
+
+[ Upstream commit 5f35227ea34bb616c436d9da47fc325866c428f3 ]
+
+GSO isn't the only offload feature with restrictions that
+potentially can't be expressed with the current features mechanism.
+Checksum is another although it's a general issue that could in
+theory apply to anything. Even if it may be possible to
+implement these restrictions in other ways, it can result in
+duplicate code or inefficient per-packet behavior.
+
+This generalizes ndo_gso_check so that drivers can remove any
+features that don't make sense for a given packet, similar to
+netif_skb_features(). It also converts existing driver
+restrictions to the new format, completing the work that was
+done to support tunnel protocols since the issues apply to
+checksums as well.
+
+By actually removing features from the set that are used to do
+offloading, it solves another problem with the existing
+interface. In these cases, GSO would run with the original set
+of features and not do anything because it appears that
+segmentation is not required.
+
+CC: Tom Herbert <therbert@google.com>
+CC: Joe Stringer <joestringer@nicira.com>
+CC: Eric Dumazet <edumazet@google.com>
+CC: Hayes Wang <hayeswang@realtek.com>
+Signed-off-by: Jesse Gross <jesse@nicira.com>
+Acked-by: Tom Herbert <therbert@google.com>
+Fixes: 04ffcb255f22 ("net: Add ndo_gso_check")
+Tested-by: Hayes Wang <hayeswang@realtek.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/emulex/benet/be_main.c | 8 ++++--
+ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 10 ++++----
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 8 ++++--
+ include/linux/netdevice.h | 20 +++++++++-------
+ include/net/vxlan.h | 28 +++++++++++++++++++----
+ net/core/dev.c | 23 +++++++++++-------
+ 6 files changed, 65 insertions(+), 32 deletions(-)
+
+--- a/drivers/net/ethernet/emulex/benet/be_main.c
++++ b/drivers/net/ethernet/emulex/benet/be_main.c
+@@ -4427,9 +4427,11 @@ static void be_del_vxlan_port(struct net
+ be16_to_cpu(port));
+ }
+
+-static bool be_gso_check(struct sk_buff *skb, struct net_device *dev)
++static netdev_features_t be_features_check(struct sk_buff *skb,
++ struct net_device *dev,
++ netdev_features_t features)
+ {
+- return vxlan_gso_check(skb);
++ return vxlan_features_check(skb, features);
+ }
+ #endif
+
+@@ -4460,7 +4462,7 @@ static const struct net_device_ops be_ne
+ #ifdef CONFIG_BE2NET_VXLAN
+ .ndo_add_vxlan_port = be_add_vxlan_port,
+ .ndo_del_vxlan_port = be_del_vxlan_port,
+- .ndo_gso_check = be_gso_check,
++ .ndo_features_check = be_features_check,
+ #endif
+ };
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+@@ -2363,9 +2363,11 @@ static void mlx4_en_del_vxlan_port(struc
+ queue_work(priv->mdev->workqueue, &priv->vxlan_del_task);
+ }
+
+-static bool mlx4_en_gso_check(struct sk_buff *skb, struct net_device *dev)
++static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
++ struct net_device *dev,
++ netdev_features_t features)
+ {
+- return vxlan_gso_check(skb);
++ return vxlan_features_check(skb, features);
+ }
+ #endif
+
+@@ -2398,7 +2400,7 @@ static const struct net_device_ops mlx4_
+ #ifdef CONFIG_MLX4_EN_VXLAN
+ .ndo_add_vxlan_port = mlx4_en_add_vxlan_port,
+ .ndo_del_vxlan_port = mlx4_en_del_vxlan_port,
+- .ndo_gso_check = mlx4_en_gso_check,
++ .ndo_features_check = mlx4_en_features_check,
+ #endif
+ };
+
+@@ -2432,7 +2434,7 @@ static const struct net_device_ops mlx4_
+ #ifdef CONFIG_MLX4_EN_VXLAN
+ .ndo_add_vxlan_port = mlx4_en_add_vxlan_port,
+ .ndo_del_vxlan_port = mlx4_en_del_vxlan_port,
+- .ndo_gso_check = mlx4_en_gso_check,
++ .ndo_features_check = mlx4_en_features_check,
+ #endif
+ };
+
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+@@ -504,9 +504,11 @@ static void qlcnic_del_vxlan_port(struct
+ adapter->flags |= QLCNIC_DEL_VXLAN_PORT;
+ }
+
+-static bool qlcnic_gso_check(struct sk_buff *skb, struct net_device *dev)
++static netdev_features_t qlcnic_features_check(struct sk_buff *skb,
++ struct net_device *dev,
++ netdev_features_t features)
+ {
+- return vxlan_gso_check(skb);
++ return vxlan_features_check(skb, features);
+ }
+ #endif
+
+@@ -531,7 +533,7 @@ static const struct net_device_ops qlcni
+ #ifdef CONFIG_QLCNIC_VXLAN
+ .ndo_add_vxlan_port = qlcnic_add_vxlan_port,
+ .ndo_del_vxlan_port = qlcnic_del_vxlan_port,
+- .ndo_gso_check = qlcnic_gso_check,
++ .ndo_features_check = qlcnic_features_check,
+ #endif
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = qlcnic_poll_controller,
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -998,12 +998,15 @@ typedef u16 (*select_queue_fallback_t)(s
+ * Callback to use for xmit over the accelerated station. This
+ * is used in place of ndo_start_xmit on accelerated net
+ * devices.
+- * bool (*ndo_gso_check) (struct sk_buff *skb,
+- * struct net_device *dev);
++ * netdev_features_t (*ndo_features_check) (struct sk_buff *skb,
++ * struct net_device *dev
++ * netdev_features_t features);
+ * Called by core transmit path to determine if device is capable of
+- * performing GSO on a packet. The device returns true if it is
+- * able to GSO the packet, false otherwise. If the return value is
+- * false the stack will do software GSO.
++ * performing offload operations on a given packet. This is to give
++ * the device an opportunity to implement any restrictions that cannot
++ * be otherwise expressed by feature flags. The check is called with
++ * the set of features that the stack has calculated and it returns
++ * those the driver believes to be appropriate.
+ */
+ struct net_device_ops {
+ int (*ndo_init)(struct net_device *dev);
+@@ -1153,8 +1156,9 @@ struct net_device_ops {
+ struct net_device *dev,
+ void *priv);
+ int (*ndo_get_lock_subclass)(struct net_device *dev);
+- bool (*ndo_gso_check) (struct sk_buff *skb,
+- struct net_device *dev);
++ netdev_features_t (*ndo_features_check) (struct sk_buff *skb,
++ struct net_device *dev,
++ netdev_features_t features);
+ };
+
+ /**
+@@ -3584,8 +3588,6 @@ static inline bool netif_needs_gso(struc
+ netdev_features_t features)
+ {
+ return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
+- (dev->netdev_ops->ndo_gso_check &&
+- !dev->netdev_ops->ndo_gso_check(skb, dev)) ||
+ unlikely((skb->ip_summed != CHECKSUM_PARTIAL) &&
+ (skb->ip_summed != CHECKSUM_UNNECESSARY)));
+ }
+--- a/include/net/vxlan.h
++++ b/include/net/vxlan.h
+@@ -1,6 +1,9 @@
+ #ifndef __NET_VXLAN_H
+ #define __NET_VXLAN_H 1
+
++#include <linux/ip.h>
++#include <linux/ipv6.h>
++#include <linux/if_vlan.h>
+ #include <linux/skbuff.h>
+ #include <linux/netdevice.h>
+ #include <linux/udp.h>
+@@ -51,16 +54,33 @@ int vxlan_xmit_skb(struct vxlan_sock *vs
+ __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
+ __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
+
+-static inline bool vxlan_gso_check(struct sk_buff *skb)
++static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
++ netdev_features_t features)
+ {
+- if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) &&
++ u8 l4_hdr = 0;
++
++ if (!skb->encapsulation)
++ return features;
++
++ switch (vlan_get_protocol(skb)) {
++ case htons(ETH_P_IP):
++ l4_hdr = ip_hdr(skb)->protocol;
++ break;
++ case htons(ETH_P_IPV6):
++ l4_hdr = ipv6_hdr(skb)->nexthdr;
++ break;
++ default:
++ return features;;
++ }
++
++ if ((l4_hdr == IPPROTO_UDP) &&
+ (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
+ skb->inner_protocol != htons(ETH_P_TEB) ||
+ (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
+ sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
+- return false;
++ return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
+
+- return true;
++ return features;
+ }
+
+ /* IP header + UDP + VXLAN + Ethernet header */
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2566,7 +2566,7 @@ static netdev_features_t harmonize_featu
+
+ netdev_features_t netif_skb_features(struct sk_buff *skb)
+ {
+- const struct net_device *dev = skb->dev;
++ struct net_device *dev = skb->dev;
+ netdev_features_t features = dev->features;
+ u16 gso_segs = skb_shinfo(skb)->gso_segs;
+ __be16 protocol = skb->protocol;
+@@ -2574,13 +2574,20 @@ netdev_features_t netif_skb_features(str
+ if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
+ features &= ~NETIF_F_GSO_MASK;
+
++ /* If encapsulation offload request, verify we are testing
++ * hardware encapsulation features instead of standard
++ * features for the netdev
++ */
++ if (skb->encapsulation)
++ features &= dev->hw_enc_features;
++
+ if (!vlan_tx_tag_present(skb)) {
+ if (unlikely(protocol == htons(ETH_P_8021Q) ||
+ protocol == htons(ETH_P_8021AD))) {
+ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+ protocol = veh->h_vlan_encapsulated_proto;
+ } else {
+- return harmonize_features(skb, features);
++ goto finalize;
+ }
+ }
+
+@@ -2598,6 +2605,11 @@ netdev_features_t netif_skb_features(str
+ NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX);
+
++finalize:
++ if (dev->netdev_ops->ndo_features_check)
++ features &= dev->netdev_ops->ndo_features_check(skb, dev,
++ features);
++
+ return harmonize_features(skb, features);
+ }
+ EXPORT_SYMBOL(netif_skb_features);
+@@ -2672,13 +2684,6 @@ static struct sk_buff *validate_xmit_skb
+ if (unlikely(!skb))
+ goto out_null;
+
+- /* If encapsulation offload request, verify we are testing
+- * hardware encapsulation features instead of standard
+- * features for the netdev
+- */
+- if (skb->encapsulation)
+- features &= dev->hw_enc_features;
+-
+ if (netif_needs_gso(dev, skb, features)) {
+ struct sk_buff *segs;
+
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Ido Shamay <idos@mellanox.com>
+Date: Tue, 16 Dec 2014 13:28:54 +0200
+Subject: net/mlx4: Cache line CQE/EQE stride fixes
+
+From: Ido Shamay <idos@mellanox.com>
+
+[ Upstream commit c3f2511feac088030055012cc8f64ebd84c87dbc ]
+
+This commit contains 2 fixes for the 128B CQE/EQE stride feaure.
+Wei found that mlx4_QUERY_HCA function marked the wrong capability
+in flags (64B CQE/EQE), when CQE/EQE stride feature was enabled.
+Also added small fix in initial CQE ownership bit assignment, when CQE
+is size is not default 32B.
+
+Fixes: 77507aa24 (net/mlx4: Enable CQE/EQE stride support)
+Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
+Signed-off-by: Ido Shamay <idos@mellanox.com>
+Signed-off-by: Amir Vadai <amirv@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 11 +++++++++--
+ drivers/net/ethernet/mellanox/mlx4/fw.c | 4 ++--
+ 2 files changed, 11 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+@@ -1569,8 +1569,15 @@ int mlx4_en_start_port(struct net_device
+ mlx4_en_free_affinity_hint(priv, i);
+ goto cq_err;
+ }
+- for (j = 0; j < cq->size; j++)
+- cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK;
++
++ for (j = 0; j < cq->size; j++) {
++ struct mlx4_cqe *cqe = NULL;
++
++ cqe = mlx4_en_get_cqe(cq->buf, j, priv->cqe_size) +
++ priv->cqe_factor;
++ cqe->owner_sr_opcode = MLX4_CQE_OWNER_MASK;
++ }
++
+ err = mlx4_en_set_cq_moder(priv, cq);
+ if (err) {
+ en_err(priv, "Failed setting cq moderation parameters\n");
+--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
++++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
+@@ -1647,8 +1647,8 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+ /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
+ MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET);
+ if (byte_field) {
+- param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
+- param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
++ param->dev_cap_enabled |= MLX4_DEV_CAP_EQE_STRIDE_ENABLED;
++ param->dev_cap_enabled |= MLX4_DEV_CAP_CQE_STRIDE_ENABLED;
+ param->cqe_size = 1 << ((byte_field &
+ MLX4_CQE_SIZE_MASK_STRIDE) + 5);
+ param->eqe_size = 1 << (((byte_field &
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Maor Gottlieb <maorg@mellanox.com>
+Date: Tue, 30 Dec 2014 11:59:49 +0200
+Subject: net/mlx4_core: Correcly update the mtt's offset in the MR re-reg flow
+
+From: Maor Gottlieb <maorg@mellanox.com>
+
+[ Upstream commit a51e0df4c1e06afd7aba84496c14238e6b363caa ]
+
+Previously, mlx4_mt_rereg_write filled the MPT's entity_size with the
+old MTT's page shift, which could result in using an incorrect offset.
+Fix the initialization to be after we calculate the new MTT offset.
+
+In addition, assign mtt order to -1 after calling mlx4_mtt_cleanup. This
+is necessary in order to mark the MTT as invalid and avoid freeing it later.
+
+Fixes: e630664 ('mlx4_core: Add helper functions to support MR re-registration')
+Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
+Signed-off-by: Matan Barak <matanb@mellanox.com>
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/mr.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
++++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
+@@ -590,6 +590,7 @@ EXPORT_SYMBOL_GPL(mlx4_mr_free);
+ void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr)
+ {
+ mlx4_mtt_cleanup(dev, &mr->mtt);
++ mr->mtt.order = -1;
+ }
+ EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup);
+
+@@ -599,14 +600,14 @@ int mlx4_mr_rereg_mem_write(struct mlx4_
+ {
+ int err;
+
+- mpt_entry->start = cpu_to_be64(iova);
+- mpt_entry->length = cpu_to_be64(size);
+- mpt_entry->entity_size = cpu_to_be32(page_shift);
+-
+ err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+ if (err)
+ return err;
+
++ mpt_entry->start = cpu_to_be64(mr->iova);
++ mpt_entry->length = cpu_to_be64(mr->size);
++ mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
++
+ mpt_entry->pd_flags &= cpu_to_be32(MLX4_MPT_PD_MASK |
+ MLX4_MPT_PD_FLAG_EN_INV);
+ mpt_entry->flags &= cpu_to_be32(MLX4_MPT_FLAG_FREE |
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Amir Vadai <amirv@mellanox.com>
+Date: Mon, 22 Dec 2014 10:21:57 +0200
+Subject: net/mlx4_en: Doorbell is byteswapped in Little Endian archs
+
+From: Amir Vadai <amirv@mellanox.com>
+
+[ Upstream commit 492f5add4be84652bbe13da8a250d60c6856a5c5 ]
+
+iowrite32() will byteswap it's argument on big endian archs.
+iowrite32be() will byteswap on little endian archs.
+Since we don't want to do this unnecessary byteswap on the fast path,
+doorbell is stored in the NIC's native endianness. Using the right
+iowrite() according to the arch endianness.
+
+CC: Wei Yang <weiyang@linux.vnet.ibm.com>
+CC: David Laight <david.laight@aculab.com>
+Fixes: 6a4e812 ("net/mlx4_en: Avoid calling bswap in tx fast path")
+Signed-off-by: Amir Vadai <amirv@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_tx.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+@@ -954,7 +954,17 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff
+ tx_desc->ctrl.owner_opcode = op_own;
+ if (send_doorbell) {
+ wmb();
+- iowrite32(ring->doorbell_qpn,
++ /* Since there is no iowrite*_native() that writes the
++ * value as is, without byteswapping - using the one
++ * the doesn't do byteswapping in the relevant arch
++ * endianness.
++ */
++#if defined(__LITTLE_ENDIAN)
++ iowrite32(
++#else
++ iowrite32be(
++#endif
++ ring->doorbell_qpn,
+ ring->bf.uar->map + MLX4_SEND_DOORBELL);
+ } else {
+ ring->xmit_more++;
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Thomas Graf <tgraf@suug.ch>
+Date: Tue, 23 Dec 2014 01:13:18 +0100
+Subject: net: Reset secmark when scrubbing packet
+
+From: Thomas Graf <tgraf@suug.ch>
+
+[ Upstream commit b8fb4e0648a2ab3734140342002f68fb0c7d1602 ]
+
+skb_scrub_packet() is called when a packet switches between a context
+such as between underlay and overlay, between namespaces, or between
+L3 subnets.
+
+While we already scrub the packet mark, connection tracking entry,
+and cached destination, the security mark/context is left intact.
+
+It seems wrong to inherit the security context of a packet when going
+from overlay to underlay or across forwarding paths.
+
+Signed-off-by: Thomas Graf <tgraf@suug.ch>
+Acked-by: Flavio Leitner <fbl@sysclose.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skbuff.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -4040,6 +4040,7 @@ void skb_scrub_packet(struct sk_buff *sk
+ skb->ignore_df = 0;
+ skb_dst_drop(skb);
+ skb->mark = 0;
++ skb_init_secmark(skb);
+ secpath_reset(skb);
+ nf_reset(skb);
+ nf_reset_trace(skb);
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: David Miller <davem@davemloft.net>
+Date: Tue, 16 Dec 2014 17:58:17 -0500
+Subject: netlink: Always copy on mmap TX.
+
+From: David Miller <davem@davemloft.net>
+
+[ Upstream commit 4682a0358639b29cf69437ed909c6221f8c89847 ]
+
+Checking the file f_count and the nlk->mapped count is not completely
+sufficient to prevent the mmap'd area contents from changing from
+under us during netlink mmap sendmsg() operations.
+
+Be careful to sample the header's length field only once, because this
+could change from under us as well.
+
+Fixes: 5fd96123ee19 ("netlink: implement memory mapped sendmsg()")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Daniel Borkmann <dborkman@redhat.com>
+Acked-by: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c | 52 ++++++++++++++---------------------------------
+ 1 file changed, 16 insertions(+), 36 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -526,14 +526,14 @@ out:
+ return err;
+ }
+
+-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
++static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
+ {
+ #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
+ struct page *p_start, *p_end;
+
+ /* First page is flushed through netlink_{get,set}_status */
+ p_start = pgvec_to_page(hdr + PAGE_SIZE);
+- p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
++ p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
+ while (p_start <= p_end) {
+ flush_dcache_page(p_start);
+ p_start++;
+@@ -715,24 +715,16 @@ static int netlink_mmap_sendmsg(struct s
+ struct nl_mmap_hdr *hdr;
+ struct sk_buff *skb;
+ unsigned int maxlen;
+- bool excl = true;
+ int err = 0, len = 0;
+
+- /* Netlink messages are validated by the receiver before processing.
+- * In order to avoid userspace changing the contents of the message
+- * after validation, the socket and the ring may only be used by a
+- * single process, otherwise we fall back to copying.
+- */
+- if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 ||
+- atomic_read(&nlk->mapped) > 1)
+- excl = false;
+-
+ mutex_lock(&nlk->pg_vec_lock);
+
+ ring = &nlk->tx_ring;
+ maxlen = ring->frame_size - NL_MMAP_HDRLEN;
+
+ do {
++ unsigned int nm_len;
++
+ hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
+ if (hdr == NULL) {
+ if (!(msg->msg_flags & MSG_DONTWAIT) &&
+@@ -740,35 +732,23 @@ static int netlink_mmap_sendmsg(struct s
+ schedule();
+ continue;
+ }
+- if (hdr->nm_len > maxlen) {
++
++ nm_len = ACCESS_ONCE(hdr->nm_len);
++ if (nm_len > maxlen) {
+ err = -EINVAL;
+ goto out;
+ }
+
+- netlink_frame_flush_dcache(hdr);
++ netlink_frame_flush_dcache(hdr, nm_len);
+
+- if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
+- skb = alloc_skb_head(GFP_KERNEL);
+- if (skb == NULL) {
+- err = -ENOBUFS;
+- goto out;
+- }
+- sock_hold(sk);
+- netlink_ring_setup_skb(skb, sk, ring, hdr);
+- NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
+- __skb_put(skb, hdr->nm_len);
+- netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
+- atomic_inc(&ring->pending);
+- } else {
+- skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
+- if (skb == NULL) {
+- err = -ENOBUFS;
+- goto out;
+- }
+- __skb_put(skb, hdr->nm_len);
+- memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
+- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
++ skb = alloc_skb(nm_len, GFP_KERNEL);
++ if (skb == NULL) {
++ err = -ENOBUFS;
++ goto out;
+ }
++ __skb_put(skb, nm_len);
++ memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
++ netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+
+ netlink_increment_head(ring);
+
+@@ -814,7 +794,7 @@ static void netlink_queue_mmaped_skb(str
+ hdr->nm_pid = NETLINK_CB(skb).creds.pid;
+ hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
+ hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
+- netlink_frame_flush_dcache(hdr);
++ netlink_frame_flush_dcache(hdr, hdr->nm_len);
+ netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+
+ NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Thomas Graf <tgraf@suug.ch>
+Date: Thu, 18 Dec 2014 10:30:26 +0000
+Subject: netlink: Don't reorder loads/stores before marking mmap netlink frame as available
+
+From: Thomas Graf <tgraf@suug.ch>
+
+[ Upstream commit a18e6a186f53af06937a2c268c72443336f4ab56 ]
+
+Each mmap Netlink frame contains a status field which indicates
+whether the frame is unused, reserved, contains data or needs to
+be skipped. Both loads and stores may not be reordeded and must
+complete before the status field is changed and another CPU might
+pick up the frame for use. Use an smp_mb() to cover needs of both
+types of callers to netlink_set_status(), callers which have been
+reading data frame from the frame, and callers which have been
+filling or releasing and thus writing to the frame.
+
+- Example code path requiring a smp_rmb():
+ memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
+ netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+
+- Example code path requiring a smp_wmb():
+ hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
+ hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
+ netlink_frame_flush_dcache(hdr);
+ netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+
+Fixes: f9c228 ("netlink: implement memory mapped recvmsg()")
+Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -551,9 +551,9 @@ static enum nl_mmap_status netlink_get_s
+ static void netlink_set_status(struct nl_mmap_hdr *hdr,
+ enum nl_mmap_status status)
+ {
++ smp_mb();
+ hdr->nm_status = status;
+ flush_dcache_page(pgvec_to_page(hdr));
+- smp_wmb();
+ }
+
+ static struct nl_mmap_hdr *
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Thu, 1 Jan 2015 00:39:23 +1100
+Subject: tcp: Do not apply TSO segment limit to non-TSO packets
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit 843925f33fcc293d80acf2c5c8a78adf3344d49b ]
+
+Thomas Jarosch reported IPsec TCP stalls when a PMTU event occurs.
+
+In fact the problem was completely unrelated to IPsec. The bug is
+also reproducible if you just disable TSO/GSO.
+
+The problem is that when the MSS goes down, existing queued packet
+on the TX queue that have not been transmitted yet all look like
+TSO packets and get treated as such.
+
+This then triggers a bug where tcp_mss_split_point tells us to
+generate a zero-sized packet on the TX queue. Once that happens
+we're screwed because the zero-sized packet can never be removed
+by ACKs.
+
+Fixes: 1485348d242 ("tcp: Apply device TSO segment limit earlier")
+Reported-by: Thomas Jarosch <thomas.jarosch@intra2net.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+
+Cheers,
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1984,7 +1984,7 @@ static bool tcp_write_xmit(struct sock *
+ if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
+ break;
+
+- if (tso_segs == 1) {
++ if (tso_segs == 1 || !sk->sk_gso_max_segs) {
+ if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
+ (tcp_skb_is_last(sk, skb) ?
+ nonagle : TCP_NAGLE_PUSH))))
+@@ -2020,7 +2020,7 @@ static bool tcp_write_xmit(struct sock *
+ }
+
+ limit = mss_now;
+- if (tso_segs > 1 && !tcp_urg_mode(tp))
++ if (tso_segs > 1 && sk->sk_gso_max_segs && !tcp_urg_mode(tp))
+ limit = tcp_mss_split_point(sk, skb, mss_now,
+ min_t(unsigned int,
+ cwnd_quota,
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Mon, 22 Dec 2014 18:22:48 +0100
+Subject: tcp6: don't move IP6CB before xfrm6_policy_check()
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+[ Upstream commit 2dc49d1680b534877fd20cce52557ea542bb06b6 ]
+
+When xfrm6_policy_check() is used, _decode_session6() is called after some
+intermediate functions. This function uses IP6CB(), thus TCP_SKB_CB() must be
+prepared after the call of xfrm6_policy_check().
+
+Before this patch, scenarii with IPv6 + TCP + IPsec Transport are broken.
+
+Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
+Reported-by: Huaibin Wang <huaibin.wang@6wind.com>
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/tcp_ipv6.c | 45 +++++++++++++++++++++++++++++----------------
+ 1 file changed, 29 insertions(+), 16 deletions(-)
+
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1385,6 +1385,28 @@ ipv6_pktoptions:
+ return 0;
+ }
+
++static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
++ const struct tcphdr *th)
++{
++ /* This is tricky: we move IP6CB at its correct location into
++ * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
++ * _decode_session6() uses IP6CB().
++ * barrier() makes sure compiler won't play aliasing games.
++ */
++ memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
++ sizeof(struct inet6_skb_parm));
++ barrier();
++
++ TCP_SKB_CB(skb)->seq = ntohl(th->seq);
++ TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
++ skb->len - th->doff*4);
++ TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
++ TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
++ TCP_SKB_CB(skb)->tcp_tw_isn = 0;
++ TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
++ TCP_SKB_CB(skb)->sacked = 0;
++}
++
+ static int tcp_v6_rcv(struct sk_buff *skb)
+ {
+ const struct tcphdr *th;
+@@ -1416,24 +1438,9 @@ static int tcp_v6_rcv(struct sk_buff *sk
+
+ th = tcp_hdr(skb);
+ hdr = ipv6_hdr(skb);
+- /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+- * barrier() makes sure compiler wont play fool^Waliasing games.
+- */
+- memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
+- sizeof(struct inet6_skb_parm));
+- barrier();
+-
+- TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+- TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+- skb->len - th->doff*4);
+- TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+- TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+- TCP_SKB_CB(skb)->tcp_tw_isn = 0;
+- TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
+- TCP_SKB_CB(skb)->sacked = 0;
+
+ sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
+- tcp_v6_iif(skb));
++ inet6_iif(skb));
+ if (!sk)
+ goto no_tcp_socket;
+
+@@ -1449,6 +1456,8 @@ process:
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ goto discard_and_relse;
+
++ tcp_v6_fill_cb(skb, hdr, th);
++
+ #ifdef CONFIG_TCP_MD5SIG
+ if (tcp_v6_inbound_md5_hash(sk, skb))
+ goto discard_and_relse;
+@@ -1480,6 +1489,8 @@ no_tcp_socket:
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto discard_it;
+
++ tcp_v6_fill_cb(skb, hdr, th);
++
+ if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ csum_error:
+ TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
+@@ -1503,6 +1514,8 @@ do_time_wait:
+ goto discard_it;
+ }
+
++ tcp_v6_fill_cb(skb, hdr, th);
++
+ if (skb->len < (th->doff<<2)) {
+ inet_twsk_put(inet_twsk(sk));
+ goto bad_packet;
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Jiri Pirko <jiri@resnulli.us>
+Date: Wed, 14 Jan 2015 18:15:30 +0100
+Subject: team: avoid possible underflow of count_pending value for notify_peers and mcast_rejoin
+
+From: Jiri Pirko <jiri@resnulli.us>
+
+[ Upstream commit b0d11b42785b70e19bc6a3122eead3f7969a7589 ]
+
+This patch is fixing a race condition that may cause setting
+count_pending to -1, which results in unwanted big bulk of arp messages
+(in case of "notify peers").
+
+Consider following scenario:
+
+count_pending == 2
+ CPU0 CPU1
+ team_notify_peers_work
+ atomic_dec_and_test (dec count_pending to 1)
+ schedule_delayed_work
+ team_notify_peers
+ atomic_add (adding 1 to count_pending)
+ team_notify_peers_work
+ atomic_dec_and_test (dec count_pending to 1)
+ schedule_delayed_work
+ team_notify_peers_work
+ atomic_dec_and_test (dec count_pending to 0)
+ schedule_delayed_work
+ team_notify_peers_work
+ atomic_dec_and_test (dec count_pending to -1)
+
+Fix this race by using atomic_dec_if_positive - that will prevent
+count_pending running under 0.
+
+Fixes: fc423ff00df3a1955441 ("team: add peer notification")
+Fixes: 492b200efdd20b8fcfd ("team: add support for sending multicast rejoins")
+Signed-off-by: Jiri Pirko <jiri@resnulli.us>
+Signed-off-by: Jiri Benc <jbenc@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/team/team.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/team/team.c
++++ b/drivers/net/team/team.c
+@@ -629,6 +629,7 @@ static int team_change_mode(struct team
+ static void team_notify_peers_work(struct work_struct *work)
+ {
+ struct team *team;
++ int val;
+
+ team = container_of(work, struct team, notify_peers.dw.work);
+
+@@ -636,9 +637,14 @@ static void team_notify_peers_work(struc
+ schedule_delayed_work(&team->notify_peers.dw, 0);
+ return;
+ }
++ val = atomic_dec_if_positive(&team->notify_peers.count_pending);
++ if (val < 0) {
++ rtnl_unlock();
++ return;
++ }
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, team->dev);
+ rtnl_unlock();
+- if (!atomic_dec_and_test(&team->notify_peers.count_pending))
++ if (val)
+ schedule_delayed_work(&team->notify_peers.dw,
+ msecs_to_jiffies(team->notify_peers.interval));
+ }
+@@ -669,6 +675,7 @@ static void team_notify_peers_fini(struc
+ static void team_mcast_rejoin_work(struct work_struct *work)
+ {
+ struct team *team;
++ int val;
+
+ team = container_of(work, struct team, mcast_rejoin.dw.work);
+
+@@ -676,9 +683,14 @@ static void team_mcast_rejoin_work(struc
+ schedule_delayed_work(&team->mcast_rejoin.dw, 0);
+ return;
+ }
++ val = atomic_dec_if_positive(&team->mcast_rejoin.count_pending);
++ if (val < 0) {
++ rtnl_unlock();
++ return;
++ }
+ call_netdevice_notifiers(NETDEV_RESEND_IGMP, team->dev);
+ rtnl_unlock();
+- if (!atomic_dec_and_test(&team->mcast_rejoin.count_pending))
++ if (val)
+ schedule_delayed_work(&team->mcast_rejoin.dw,
+ msecs_to_jiffies(team->mcast_rejoin.interval));
+ }
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: Prashant Sreedharan <prashant@broadcom.com>
+Date: Sat, 20 Dec 2014 12:16:17 -0800
+Subject: tg3: tg3_disable_ints using uninitialized mailbox value to disable interrupts
+
+From: Prashant Sreedharan <prashant@broadcom.com>
+
+[ Upstream commit 05b0aa579397b734f127af58e401a30784a1e315 ]
+
+During driver load in tg3_init_one, if the driver detects DMA activity before
+intializing the chip tg3_halt is called. As part of tg3_halt interrupts are
+disabled using routine tg3_disable_ints. This routine was using mailbox value
+which was not initialized (default value is 0). As a result driver was writing
+0x00000001 to pci config space register 0, which is the vendor id / device id.
+
+This driver bug was exposed because of the commit a7877b17a667 (PCI: Check only
+the Vendor ID to identify Configuration Request Retry). Also this issue is only
+seen in older generation chipsets like 5722 because config space write to offset
+0 from driver is possible. The newer generation chips ignore writes to offset 0.
+Also without commit a7877b17a667, for these older chips when a GRC reset is
+issued the Bootcode would reprogram the vendor id/device id, which is the reason
+this bug was masked earlier.
+
+Fixed by initializing the interrupt mailbox registers before calling tg3_halt.
+
+Please queue for -stable.
+
+Reported-by: Nils Holland <nholland@tisys.org>
+Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: Prashant Sreedharan <prashant@broadcom.com>
+Signed-off-by: Michael Chan <mchan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c | 34 +++++++++++++++++-----------------
+ 1 file changed, 17 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -17789,23 +17789,6 @@ static int tg3_init_one(struct pci_dev *
+ goto err_out_apeunmap;
+ }
+
+- /*
+- * Reset chip in case UNDI or EFI driver did not shutdown
+- * DMA self test will enable WDMAC and we'll see (spurious)
+- * pending DMA on the PCI bus at that point.
+- */
+- if ((tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE) ||
+- (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
+- tw32(MEMARB_MODE, MEMARB_MODE_ENABLE);
+- tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
+- }
+-
+- err = tg3_test_dma(tp);
+- if (err) {
+- dev_err(&pdev->dev, "DMA engine test failed, aborting\n");
+- goto err_out_apeunmap;
+- }
+-
+ intmbx = MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW;
+ rcvmbx = MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW;
+ sndmbx = MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW;
+@@ -17850,6 +17833,23 @@ static int tg3_init_one(struct pci_dev *
+ sndmbx += 0xc;
+ }
+
++ /*
++ * Reset chip in case UNDI or EFI driver did not shutdown
++ * DMA self test will enable WDMAC and we'll see (spurious)
++ * pending DMA on the PCI bus at that point.
++ */
++ if ((tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE) ||
++ (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
++ tw32(MEMARB_MODE, MEMARB_MODE_ENABLE);
++ tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
++ }
++
++ err = tg3_test_dma(tp);
++ if (err) {
++ dev_err(&pdev->dev, "DMA engine test failed, aborting\n");
++ goto err_out_apeunmap;
++ }
++
+ tg3_init_coal(tp);
+
+ pci_set_drvdata(pdev, dev);
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: "Palik, Imre" <imrep@amazon.de>
+Date: Tue, 6 Jan 2015 16:44:44 +0100
+Subject: xen-netback: fixing the propagation of the transmit shaper timeout
+
+From: "Palik, Imre" <imrep@amazon.de>
+
+[ Upstream commit 07ff890daeda31cf23173865edf50bcb03e100c3 ]
+
+Since e9ce7cb6b107 ("xen-netback: Factor queue-specific data into queue struct"),
+the transimt shaper timeout is always set to 0. The value the user sets via
+xenbus is never propagated to the transmit shaper.
+
+This patch fixes the issue.
+
+Cc: Anthony Liguori <aliguori@amazon.com>
+Signed-off-by: Imre Palik <imrep@amazon.de>
+Acked-by: Ian Campbell <ian.campbell@citrix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netback/xenbus.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/xen-netback/xenbus.c
++++ b/drivers/net/xen-netback/xenbus.c
+@@ -736,6 +736,7 @@ static void connect(struct backend_info
+ }
+
+ queue->remaining_credit = credit_bytes;
++ queue->credit_usec = credit_usec;
+
+ err = connect_rings(be, queue);
+ if (err) {
--- /dev/null
+From foo@baz Sat Jan 17 18:12:21 PST 2015
+From: David Vrabel <david.vrabel@citrix.com>
+Date: Thu, 18 Dec 2014 11:13:06 +0000
+Subject: xen-netback: support frontends without feature-rx-notify again
+
+From: David Vrabel <david.vrabel@citrix.com>
+
+[ Upstram commit 26c0e102585d5a4d311f5d6eb7f524d288e7f6b7 ]
+
+Commit bc96f648df1bbc2729abbb84513cf4f64273a1f1 (xen-netback: make
+feature-rx-notify mandatory) incorrectly assumed that there were no
+frontends in use that did not support this feature. But the frontend
+driver in MiniOS does not and since this is used by (qemu) stubdoms,
+these stopped working.
+
+Netback sort of works as-is in this mode except:
+
+- If there are no Rx requests and the internal Rx queue fills, only
+ the drain timeout will wake the thread. The default drain timeout
+ of 10 s would give unacceptable pauses.
+
+- If an Rx stall was detected and the internal Rx queue is drained,
+ then the Rx thread would never wake.
+
+Handle these two cases (when feature-rx-notify is disabled) by:
+
+- Reducing the drain timeout to 30 ms.
+
+- Disabling Rx stall detection.
+
+Reported-by: John <jw@nuclearfallout.net>
+Tested-by: John <jw@nuclearfallout.net>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Wei Liu <wei.liu2@citrix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netback/common.h | 4 +++-
+ drivers/net/xen-netback/interface.c | 4 +++-
+ drivers/net/xen-netback/netback.c | 27 ++++++++++++++-------------
+ drivers/net/xen-netback/xenbus.c | 12 +++++++++---
+ 4 files changed, 29 insertions(+), 18 deletions(-)
+
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -230,6 +230,8 @@ struct xenvif {
+ */
+ bool disabled;
+ unsigned long status;
++ unsigned long drain_timeout;
++ unsigned long stall_timeout;
+
+ /* Queues */
+ struct xenvif_queue *queues;
+@@ -328,7 +330,7 @@ irqreturn_t xenvif_interrupt(int irq, vo
+ extern bool separate_tx_rx_irq;
+
+ extern unsigned int rx_drain_timeout_msecs;
+-extern unsigned int rx_drain_timeout_jiffies;
++extern unsigned int rx_stall_timeout_msecs;
+ extern unsigned int xenvif_max_queues;
+
+ #ifdef CONFIG_DEBUG_FS
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -166,7 +166,7 @@ static int xenvif_start_xmit(struct sk_b
+ goto drop;
+
+ cb = XENVIF_RX_CB(skb);
+- cb->expires = jiffies + rx_drain_timeout_jiffies;
++ cb->expires = jiffies + vif->drain_timeout;
+
+ xenvif_rx_queue_tail(queue, skb);
+ xenvif_kick_thread(queue);
+@@ -414,6 +414,8 @@ struct xenvif *xenvif_alloc(struct devic
+ vif->ip_csum = 1;
+ vif->dev = dev;
+ vif->disabled = false;
++ vif->drain_timeout = msecs_to_jiffies(rx_drain_timeout_msecs);
++ vif->stall_timeout = msecs_to_jiffies(rx_stall_timeout_msecs);
+
+ /* Start out with no queues. */
+ vif->queues = NULL;
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -60,14 +60,12 @@ module_param(separate_tx_rx_irq, bool, 0
+ */
+ unsigned int rx_drain_timeout_msecs = 10000;
+ module_param(rx_drain_timeout_msecs, uint, 0444);
+-unsigned int rx_drain_timeout_jiffies;
+
+ /* The length of time before the frontend is considered unresponsive
+ * because it isn't providing Rx slots.
+ */
+-static unsigned int rx_stall_timeout_msecs = 60000;
++unsigned int rx_stall_timeout_msecs = 60000;
+ module_param(rx_stall_timeout_msecs, uint, 0444);
+-static unsigned int rx_stall_timeout_jiffies;
+
+ unsigned int xenvif_max_queues;
+ module_param_named(max_queues, xenvif_max_queues, uint, 0644);
+@@ -2022,7 +2020,7 @@ static bool xenvif_rx_queue_stalled(stru
+ return !queue->stalled
+ && prod - cons < XEN_NETBK_RX_SLOTS_MAX
+ && time_after(jiffies,
+- queue->last_rx_time + rx_stall_timeout_jiffies);
++ queue->last_rx_time + queue->vif->stall_timeout);
+ }
+
+ static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
+@@ -2040,8 +2038,9 @@ static bool xenvif_have_rx_work(struct x
+ {
+ return (!skb_queue_empty(&queue->rx_queue)
+ && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
+- || xenvif_rx_queue_stalled(queue)
+- || xenvif_rx_queue_ready(queue)
++ || (queue->vif->stall_timeout &&
++ (xenvif_rx_queue_stalled(queue)
++ || xenvif_rx_queue_ready(queue)))
+ || kthread_should_stop()
+ || queue->vif->disabled;
+ }
+@@ -2094,6 +2093,9 @@ int xenvif_kthread_guest_rx(void *data)
+ struct xenvif_queue *queue = data;
+ struct xenvif *vif = queue->vif;
+
++ if (!vif->stall_timeout)
++ xenvif_queue_carrier_on(queue);
++
+ for (;;) {
+ xenvif_wait_for_rx_work(queue);
+
+@@ -2120,10 +2122,12 @@ int xenvif_kthread_guest_rx(void *data)
+ * while it's probably not responsive, drop the
+ * carrier so packets are dropped earlier.
+ */
+- if (xenvif_rx_queue_stalled(queue))
+- xenvif_queue_carrier_off(queue);
+- else if (xenvif_rx_queue_ready(queue))
+- xenvif_queue_carrier_on(queue);
++ if (vif->stall_timeout) {
++ if (xenvif_rx_queue_stalled(queue))
++ xenvif_queue_carrier_off(queue);
++ else if (xenvif_rx_queue_ready(queue))
++ xenvif_queue_carrier_on(queue);
++ }
+
+ /* Queued packets may have foreign pages from other
+ * domains. These cannot be queued indefinitely as
+@@ -2194,9 +2198,6 @@ static int __init netback_init(void)
+ if (rc)
+ goto failed_init;
+
+- rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
+- rx_stall_timeout_jiffies = msecs_to_jiffies(rx_stall_timeout_msecs);
+-
+ #ifdef CONFIG_DEBUG_FS
+ xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
+ if (IS_ERR_OR_NULL(xen_netback_dbg_root))
+--- a/drivers/net/xen-netback/xenbus.c
++++ b/drivers/net/xen-netback/xenbus.c
+@@ -886,9 +886,15 @@ static int read_xenbus_vif_flags(struct
+ return -EOPNOTSUPP;
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend,
+- "feature-rx-notify", "%d", &val) < 0 || val == 0) {
+- xenbus_dev_fatal(dev, -EINVAL, "feature-rx-notify is mandatory");
+- return -EINVAL;
++ "feature-rx-notify", "%d", &val) < 0)
++ val = 0;
++ if (!val) {
++ /* - Reduce drain timeout to poll more frequently for
++ * Rx requests.
++ * - Disable Rx stall detection.
++ */
++ be->vif->drain_timeout = msecs_to_jiffies(30);
++ be->vif->stall_timeout = 0;
+ }
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",