]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.0-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 14 Mar 2012 23:47:20 +0000 (16:47 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 14 Mar 2012 23:47:20 +0000 (16:47 -0700)
added patches:
atl1c-dont-use-highprio-tx-queue.patch
bridge-check-return-value-of-ipv6_dev_get_saddr.patch
ipsec-be-careful-of-non-existing-mac-headers.patch
ipv6-fix-not-join-all-router-mcast-group-when-forwarding-set.patch
neighbour-fixed-race-condition-at-tbl-nht.patch
ppp-fix-ppp_mp_reconstruct-bad-seq-errors.patch
tcp-don-t-fragment-sacked-skbs-in-tcp_mark_head_lost.patch
tcp-fix-false-reordering-signal-in-tcp_shifted_skb.patch
tcp-fix-tcp_shift_skb_data-to-not-shift-sacked-data-below-snd_una.patch
vmxnet3-fix-transport-header-size.patch

queue-3.0/atl1c-dont-use-highprio-tx-queue.patch [new file with mode: 0644]
queue-3.0/bridge-check-return-value-of-ipv6_dev_get_saddr.patch [new file with mode: 0644]
queue-3.0/ipsec-be-careful-of-non-existing-mac-headers.patch [new file with mode: 0644]
queue-3.0/ipv6-fix-not-join-all-router-mcast-group-when-forwarding-set.patch [new file with mode: 0644]
queue-3.0/neighbour-fixed-race-condition-at-tbl-nht.patch [new file with mode: 0644]
queue-3.0/ppp-fix-ppp_mp_reconstruct-bad-seq-errors.patch [new file with mode: 0644]
queue-3.0/series
queue-3.0/tcp-don-t-fragment-sacked-skbs-in-tcp_mark_head_lost.patch [new file with mode: 0644]
queue-3.0/tcp-fix-false-reordering-signal-in-tcp_shifted_skb.patch [new file with mode: 0644]
queue-3.0/tcp-fix-tcp_shift_skb_data-to-not-shift-sacked-data-below-snd_una.patch [new file with mode: 0644]
queue-3.0/vmxnet3-fix-transport-header-size.patch [new file with mode: 0644]

diff --git a/queue-3.0/atl1c-dont-use-highprio-tx-queue.patch b/queue-3.0/atl1c-dont-use-highprio-tx-queue.patch
new file mode 100644 (file)
index 0000000..8d3204b
--- /dev/null
@@ -0,0 +1,49 @@
+From 18a32153146cbbc5549d5bebd5bb8a3386b6bbe1 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet@gmail.com>
+Date: Wed, 15 Feb 2012 20:43:11 +0000
+Subject: atl1c: dont use highprio tx queue
+
+
+From: Eric Dumazet <eric.dumazet@gmail.com>
+
+[ Upstream commit 11aad99af6ef629ff3b05d1c9f0936589b204316 ]
+
+This driver attempts to use two TX rings but lacks proper support :
+
+1) IRQ handler only takes care of TX completion on first TX ring
+2) the stop/start logic uses the legacy functions (for non multiqueue
+drivers)
+
+This means all packets witk skb mark set to 1 are sent through high
+queue but are never cleaned and queue eventualy fills and block the
+device, triggering the infamous "NETDEV WATCHDOG" message.
+
+Lets use a single TX ring to fix the problem, this driver is not a real
+multiqueue one yet.
+
+Minimal fix for stable kernels.
+
+Reported-by: Thomas Meyer <thomas@m3y3r.de>
+Tested-by: Thomas Meyer <thomas@m3y3r.de>
+Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: Jay Cliburn <jcliburn@gmail.com>
+Cc: Chris Snook <chris.snook@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/atl1c/atl1c_main.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/drivers/net/atl1c/atl1c_main.c
++++ b/drivers/net/atl1c/atl1c_main.c
+@@ -2223,10 +2223,6 @@ static netdev_tx_t atl1c_xmit_frame(stru
+                       dev_info(&adapter->pdev->dev, "tx locked\n");
+               return NETDEV_TX_LOCKED;
+       }
+-      if (skb->mark == 0x01)
+-              type = atl1c_trans_high;
+-      else
+-              type = atl1c_trans_normal;
+       if (atl1c_tpd_avail(adapter, type) < tpd_req) {
+               /* no enough descriptor, just stop queue */
diff --git a/queue-3.0/bridge-check-return-value-of-ipv6_dev_get_saddr.patch b/queue-3.0/bridge-check-return-value-of-ipv6_dev_get_saddr.patch
new file mode 100644 (file)
index 0000000..eac46b0
--- /dev/null
@@ -0,0 +1,37 @@
+From 070413ec1d7df89c4e063d0ad10fa9b390935614 Mon Sep 17 00:00:00 2001
+From: Ulrich Weber <ulrich.weber@sophos.com>
+Date: Mon, 5 Mar 2012 04:52:44 +0000
+Subject: bridge: check return value of ipv6_dev_get_saddr()
+
+
+From: Ulrich Weber <ulrich.weber@sophos.com>
+
+[ Upstream commit d1d81d4c3dd886d5fa25a2c4fa1e39cb89613712 ]
+
+otherwise source IPv6 address of ICMPV6_MGM_QUERY packet
+might be random junk if IPv6 is disabled on interface or
+link-local address is not yet ready (DAD).
+
+Signed-off-by: Ulrich Weber <ulrich.weber@sophos.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_multicast.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -446,8 +446,11 @@ static struct sk_buff *br_ip6_multicast_
+       ip6h->nexthdr = IPPROTO_HOPOPTS;
+       ip6h->hop_limit = 1;
+       ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
+-      ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
+-                         &ip6h->saddr);
++      if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
++                             &ip6h->saddr)) {
++              kfree_skb(skb);
++              return NULL;
++      }
+       ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
+       hopopt = (u8 *)(ip6h + 1);
diff --git a/queue-3.0/ipsec-be-careful-of-non-existing-mac-headers.patch b/queue-3.0/ipsec-be-careful-of-non-existing-mac-headers.patch
new file mode 100644 (file)
index 0000000..0fbb3f1
--- /dev/null
@@ -0,0 +1,131 @@
+From 58e9fde060f31ba3fd48e1418a31fa66f9bde467 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet@gmail.com>
+Date: Thu, 23 Feb 2012 10:55:02 +0000
+Subject: ipsec: be careful of non existing mac headers
+
+
+From: Eric Dumazet <eric.dumazet@gmail.com>
+
+[ Upstream commit 03606895cd98c0a628b17324fd7b5ff15db7e3cd ]
+
+Niccolo Belli reported ipsec crashes in case we handle a frame without
+mac header (atm in his case)
+
+Before copying mac header, better make sure it is present.
+
+Bugzilla reference:  https://bugzilla.kernel.org/show_bug.cgi?id=42809
+
+Reported-by: Niccolò Belli <darkbasic@linuxsystems.it>
+Tested-by: Niccolò Belli <darkbasic@linuxsystems.it>
+Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h       |   10 ++++++++++
+ net/ipv4/xfrm4_mode_beet.c   |    5 +----
+ net/ipv4/xfrm4_mode_tunnel.c |    6 ++----
+ net/ipv6/xfrm6_mode_beet.c   |    6 +-----
+ net/ipv6/xfrm6_mode_tunnel.c |    6 ++----
+ 5 files changed, 16 insertions(+), 17 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1370,6 +1370,16 @@ static inline void skb_set_mac_header(st
+ }
+ #endif /* NET_SKBUFF_DATA_USES_OFFSET */
++static inline void skb_mac_header_rebuild(struct sk_buff *skb)
++{
++      if (skb_mac_header_was_set(skb)) {
++              const unsigned char *old_mac = skb_mac_header(skb);
++
++              skb_set_mac_header(skb, -skb->mac_len);
++              memmove(skb_mac_header(skb), old_mac, skb->mac_len);
++      }
++}
++
+ static inline int skb_checksum_start_offset(const struct sk_buff *skb)
+ {
+       return skb->csum_start - skb_headroom(skb);
+--- a/net/ipv4/xfrm4_mode_beet.c
++++ b/net/ipv4/xfrm4_mode_beet.c
+@@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_
+       skb_push(skb, sizeof(*iph));
+       skb_reset_network_header(skb);
+-
+-      memmove(skb->data - skb->mac_len, skb_mac_header(skb),
+-              skb->mac_len);
+-      skb_set_mac_header(skb, -skb->mac_len);
++      skb_mac_header_rebuild(skb);
+       xfrm4_beet_make_header(skb);
+--- a/net/ipv4/xfrm4_mode_tunnel.c
++++ b/net/ipv4/xfrm4_mode_tunnel.c
+@@ -66,7 +66,6 @@ static int xfrm4_mode_tunnel_output(stru
+ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
+ {
+-      const unsigned char *old_mac;
+       int err = -EINVAL;
+       if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
+@@ -84,10 +83,9 @@ static int xfrm4_mode_tunnel_input(struc
+       if (!(x->props.flags & XFRM_STATE_NOECN))
+               ipip_ecn_decapsulate(skb);
+-      old_mac = skb_mac_header(skb);
+-      skb_set_mac_header(skb, -skb->mac_len);
+-      memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+       skb_reset_network_header(skb);
++      skb_mac_header_rebuild(skb);
++
+       err = 0;
+ out:
+--- a/net/ipv6/xfrm6_mode_beet.c
++++ b/net/ipv6/xfrm6_mode_beet.c
+@@ -80,7 +80,6 @@ static int xfrm6_beet_output(struct xfrm
+ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
+ {
+       struct ipv6hdr *ip6h;
+-      const unsigned char *old_mac;
+       int size = sizeof(struct ipv6hdr);
+       int err;
+@@ -90,10 +89,7 @@ static int xfrm6_beet_input(struct xfrm_
+       __skb_push(skb, size);
+       skb_reset_network_header(skb);
+-
+-      old_mac = skb_mac_header(skb);
+-      skb_set_mac_header(skb, -skb->mac_len);
+-      memmove(skb_mac_header(skb), old_mac, skb->mac_len);
++      skb_mac_header_rebuild(skb);
+       xfrm6_beet_make_header(skb);
+--- a/net/ipv6/xfrm6_mode_tunnel.c
++++ b/net/ipv6/xfrm6_mode_tunnel.c
+@@ -63,7 +63,6 @@ static int xfrm6_mode_tunnel_output(stru
+ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
+ {
+       int err = -EINVAL;
+-      const unsigned char *old_mac;
+       if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
+               goto out;
+@@ -80,10 +79,9 @@ static int xfrm6_mode_tunnel_input(struc
+       if (!(x->props.flags & XFRM_STATE_NOECN))
+               ipip6_ecn_decapsulate(skb);
+-      old_mac = skb_mac_header(skb);
+-      skb_set_mac_header(skb, -skb->mac_len);
+-      memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+       skb_reset_network_header(skb);
++      skb_mac_header_rebuild(skb);
++
+       err = 0;
+ out:
diff --git a/queue-3.0/ipv6-fix-not-join-all-router-mcast-group-when-forwarding-set.patch b/queue-3.0/ipv6-fix-not-join-all-router-mcast-group-when-forwarding-set.patch
new file mode 100644 (file)
index 0000000..f174853
--- /dev/null
@@ -0,0 +1,33 @@
+From 8f321ec08beba0cba001367bfbed831c8d316978 Mon Sep 17 00:00:00 2001
+From: Li Wei <lw@cn.fujitsu.com>
+Date: Mon, 5 Mar 2012 14:45:17 +0000
+Subject: IPv6: Fix not join all-router mcast group when forwarding set.
+
+
+From: Li Wei <lw@cn.fujitsu.com>
+
+[ Upstream commit d6ddef9e641d1229d4ec841dc75ae703171c3e92 ]
+
+When forwarding was set and a new net device is register,
+we need add this device to the all-router mcast group.
+
+Signed-off-by: Li Wei <lw@cn.fujitsu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -433,6 +433,10 @@ static struct inet6_dev * ipv6_add_dev(s
+       /* Join all-node multicast group */
+       ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
++      /* Join all-router multicast group if forwarding is set */
++      if (ndev->cnf.forwarding && dev && (dev->flags & IFF_MULTICAST))
++              ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
++
+       return ndev;
+ }
diff --git a/queue-3.0/neighbour-fixed-race-condition-at-tbl-nht.patch b/queue-3.0/neighbour-fixed-race-condition-at-tbl-nht.patch
new file mode 100644 (file)
index 0000000..91e7a15
--- /dev/null
@@ -0,0 +1,46 @@
+From c98dd2fe278869db8d534d989c9ee879bd23dbb4 Mon Sep 17 00:00:00 2001
+From: Michel Machado <michel@digirati.com.br>
+Date: Tue, 21 Feb 2012 11:04:13 +0000
+Subject: neighbour: Fixed race condition at tbl->nht
+
+
+From: Michel Machado <michel@digirati.com.br>
+
+[ Upstream commit 84338a6c9dbb6ff3de4749864020f8f25d86fc81 ]
+
+When the fixed race condition happens:
+
+1. While function neigh_periodic_work scans the neighbor hash table
+pointed by field tbl->nht, it unlocks and locks tbl->lock between
+buckets in order to call cond_resched.
+
+2. Assume that function neigh_periodic_work calls cond_resched, that is,
+the lock tbl->lock is available, and function neigh_hash_grow runs.
+
+3. Once function neigh_hash_grow finishes, and RCU calls
+neigh_hash_free_rcu, the original struct neigh_hash_table that function
+neigh_periodic_work was using doesn't exist anymore.
+
+4. Once back at neigh_periodic_work, whenever the old struct
+neigh_hash_table is accessed, things can go badly.
+
+Signed-off-by: Michel Machado <michel@digirati.com.br>
+CC: "David S. Miller" <davem@davemloft.net>
+CC: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/neighbour.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -823,6 +823,8 @@ next_elt:
+               write_unlock_bh(&tbl->lock);
+               cond_resched();
+               write_lock_bh(&tbl->lock);
++              nht = rcu_dereference_protected(tbl->nht,
++                                              lockdep_is_held(&tbl->lock));
+       }
+       /* Cycle through all hash buckets every base_reachable_time/2 ticks.
+        * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
diff --git a/queue-3.0/ppp-fix-ppp_mp_reconstruct-bad-seq-errors.patch b/queue-3.0/ppp-fix-ppp_mp_reconstruct-bad-seq-errors.patch
new file mode 100644 (file)
index 0000000..f38072b
--- /dev/null
@@ -0,0 +1,88 @@
+From a52a55a6eca3a942c7126e1543ae5552225bc38d Mon Sep 17 00:00:00 2001
+From: Ben McKeegan <ben@netservers.co.uk>
+Date: Fri, 24 Feb 2012 06:33:56 +0000
+Subject: ppp: fix 'ppp_mp_reconstruct bad seq' errors
+
+
+From: Ben McKeegan <ben@netservers.co.uk>
+
+[ Upstream commit 8a49ad6e89feb5015e77ce6efeb2678947117e20 ]
+
+This patch fixes a (mostly cosmetic) bug introduced by the patch
+'ppp: Use SKB queue abstraction interfaces in fragment processing'
+found here: http://www.spinics.net/lists/netdev/msg153312.html
+
+The above patch rewrote and moved the code responsible for cleaning
+up discarded fragments but the new code does not catch every case
+where this is necessary.  This results in some discarded fragments
+remaining in the queue, and triggering a 'bad seq' error on the
+subsequent call to ppp_mp_reconstruct.  Fragments are discarded
+whenever other fragments of the same frame have been lost.
+This can generate a lot of unwanted and misleading log messages.
+
+This patch also adds additional detail to the debug logging to
+make it clearer which fragments were lost and which other fragments
+were discarded as a result of losses. (Run pppd with 'kdebug 1'
+option to enable debug logging.)
+
+Signed-off-by: Ben McKeegan <ben@netservers.co.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp_generic.c |   23 +++++++++++++++++++++++
+ 1 file changed, 23 insertions(+)
+
+--- a/drivers/net/ppp_generic.c
++++ b/drivers/net/ppp_generic.c
+@@ -2019,14 +2019,22 @@ ppp_mp_reconstruct(struct ppp *ppp)
+                       continue;
+               }
+               if (PPP_MP_CB(p)->sequence != seq) {
++                      u32 oldseq;
+                       /* Fragment `seq' is missing.  If it is after
+                          minseq, it might arrive later, so stop here. */
+                       if (seq_after(seq, minseq))
+                               break;
+                       /* Fragment `seq' is lost, keep going. */
+                       lost = 1;
++                      oldseq = seq;
+                       seq = seq_before(minseq, PPP_MP_CB(p)->sequence)?
+                               minseq + 1: PPP_MP_CB(p)->sequence;
++
++                      if (ppp->debug & 1)
++                              netdev_printk(KERN_DEBUG, ppp->dev,
++                                            "lost frag %u..%u\n",
++                                            oldseq, seq-1);
++
+                       goto again;
+               }
+@@ -2071,6 +2079,10 @@ ppp_mp_reconstruct(struct ppp *ppp)
+                       struct sk_buff *tmp2;
+                       skb_queue_reverse_walk_from_safe(list, p, tmp2) {
++                              if (ppp->debug & 1)
++                                      netdev_printk(KERN_DEBUG, ppp->dev,
++                                                    "discarding frag %u\n",
++                                                    PPP_MP_CB(p)->sequence);
+                               __skb_unlink(p, list);
+                               kfree_skb(p);
+                       }
+@@ -2086,6 +2098,17 @@ ppp_mp_reconstruct(struct ppp *ppp)
+               /* If we have discarded any fragments,
+                  signal a receive error. */
+               if (PPP_MP_CB(head)->sequence != ppp->nextseq) {
++                      skb_queue_walk_safe(list, p, tmp) {
++                              if (p == head)
++                                      break;
++                              if (ppp->debug & 1)
++                                      netdev_printk(KERN_DEBUG, ppp->dev,
++                                                    "discarding frag %u\n",
++                                                    PPP_MP_CB(p)->sequence);
++                              __skb_unlink(p, list);
++                              kfree_skb(p);
++                      }
++
+                       if (ppp->debug & 1)
+                               netdev_printk(KERN_DEBUG, ppp->dev,
+                                             "  missed pkts %u..%u\n",
index 500c5b60f851d8449d8d815ff56c27152755c40f..9368e627d7d5451b0229a87c6fd52ccdd958b613 100644 (file)
@@ -10,3 +10,13 @@ rt2x00-fix-random-stalls.patch
 vfs-fix-return-value-from-do_last.patch
 vfs-fix-double-put-after-complete_walk.patch
 acer-wmi-no-wifi-rfkill-on-lenovo-machines.patch
+neighbour-fixed-race-condition-at-tbl-nht.patch
+ipsec-be-careful-of-non-existing-mac-headers.patch
+ppp-fix-ppp_mp_reconstruct-bad-seq-errors.patch
+tcp-fix-false-reordering-signal-in-tcp_shifted_skb.patch
+vmxnet3-fix-transport-header-size.patch
+tcp-don-t-fragment-sacked-skbs-in-tcp_mark_head_lost.patch
+bridge-check-return-value-of-ipv6_dev_get_saddr.patch
+tcp-fix-tcp_shift_skb_data-to-not-shift-sacked-data-below-snd_una.patch
+ipv6-fix-not-join-all-router-mcast-group-when-forwarding-set.patch
+atl1c-dont-use-highprio-tx-queue.patch
diff --git a/queue-3.0/tcp-don-t-fragment-sacked-skbs-in-tcp_mark_head_lost.patch b/queue-3.0/tcp-don-t-fragment-sacked-skbs-in-tcp_mark_head_lost.patch
new file mode 100644 (file)
index 0000000..820bcc4
--- /dev/null
@@ -0,0 +1,49 @@
+From d590045e3989119c2f7ccbdd81edb940fb13b80f Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 2 Mar 2012 21:36:51 +0000
+Subject: tcp: don't fragment SACKed skbs in tcp_mark_head_lost()
+
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit c0638c247f559e1a16ee79e54df14bca2cb679ea ]
+
+In tcp_mark_head_lost() we should not attempt to fragment a SACKed skb
+to mark the first portion as lost. This is for two primary reasons:
+
+(1) tcp_shifted_skb() coalesces adjacent regions of SACKed skbs. When
+doing this, it preserves the sum of their packet counts in order to
+reflect the real-world dynamics on the wire. But given that skbs can
+have remainders that do not align to MSS boundaries, this packet count
+preservation means that for SACKed skbs there is not necessarily a
+direct linear relationship between tcp_skb_pcount(skb) and
+skb->len. Thus tcp_mark_head_lost()'s previous attempts to fragment
+off and mark as lost a prefix of length (packets - oldcnt)*mss from
+SACKed skbs were leading to occasional failures of the WARN_ON(len >
+skb->len) in tcp_fragment() (which used to be a BUG_ON(); see the
+recent "crash in tcp_fragment" thread on netdev).
+
+(2) there is no real point in fragmenting off part of a SACKed skb and
+calling tcp_skb_mark_lost() on it, since tcp_skb_mark_lost() is a NOP
+for SACKed skbs.
+
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Nandita Dukkipati <nanditad@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2549,6 +2549,7 @@ static void tcp_mark_head_lost(struct so
+               if (cnt > packets) {
+                       if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
++                          (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
+                           (oldcnt >= packets))
+                               break;
diff --git a/queue-3.0/tcp-fix-false-reordering-signal-in-tcp_shifted_skb.patch b/queue-3.0/tcp-fix-false-reordering-signal-in-tcp_shifted_skb.patch
new file mode 100644 (file)
index 0000000..1430231
--- /dev/null
@@ -0,0 +1,70 @@
+From bd31d3812b91196b37c5fecb280b9b1d8338367a Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Sun, 26 Feb 2012 10:06:19 +0000
+Subject: tcp: fix false reordering signal in tcp_shifted_skb
+
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit 4c90d3b30334833450ccbb02f452d4972a3c3c3f ]
+
+When tcp_shifted_skb() shifts bytes from the skb that is currently
+pointed to by 'highest_sack' then the increment of
+TCP_SKB_CB(skb)->seq implicitly advances tcp_highest_sack_seq(). This
+implicit advancement, combined with the recent fix to pass the correct
+SACKed range into tcp_sacktag_one(), caused tcp_sacktag_one() to think
+that the newly SACKed range was before the tcp_highest_sack_seq(),
+leading to a call to tcp_update_reordering() with a degree of
+reordering matching the size of the newly SACKed range (typically just
+1 packet, which is a NOP, but potentially larger).
+
+This commit fixes this by simply calling tcp_sacktag_one() before the
+TCP_SKB_CB(skb)->seq advancement that can advance our notion of the
+highest SACKed sequence.
+
+Correspondingly, we can simplify the code a little now that
+tcp_shifted_skb() should update the lost_cnt_hint in all cases where
+skb == tp->lost_skb_hint.
+
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |   18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1385,8 +1385,16 @@ static int tcp_shifted_skb(struct sock *
+       BUG_ON(!pcount);
+-      /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */
+-      if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint))
++      /* Adjust counters and hints for the newly sacked sequence
++       * range but discard the return value since prev is already
++       * marked. We must tag the range first because the seq
++       * advancement below implicitly advances
++       * tcp_highest_sack_seq() when skb is highest_sack.
++       */
++      tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
++                      start_seq, end_seq, dup_sack, pcount);
++
++      if (skb == tp->lost_skb_hint)
+               tp->lost_cnt_hint += pcount;
+       TCP_SKB_CB(prev)->end_seq += shifted;
+@@ -1412,12 +1420,6 @@ static int tcp_shifted_skb(struct sock *
+               skb_shinfo(skb)->gso_type = 0;
+       }
+-      /* Adjust counters and hints for the newly sacked sequence range but
+-       * discard the return value since prev is already marked.
+-       */
+-      tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
+-                      start_seq, end_seq, dup_sack, pcount);
+-
+       /* Difference in this won't matter, both ACKed by the same cumul. ACK */
+       TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
diff --git a/queue-3.0/tcp-fix-tcp_shift_skb_data-to-not-shift-sacked-data-below-snd_una.patch b/queue-3.0/tcp-fix-tcp_shift_skb_data-to-not-shift-sacked-data-below-snd_una.patch
new file mode 100644 (file)
index 0000000..37909e5
--- /dev/null
@@ -0,0 +1,86 @@
+From 1a637998ce3026247103ee263697f32a507d39c9 Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Mon, 5 Mar 2012 19:35:04 +0000
+Subject: tcp: fix tcp_shift_skb_data() to not shift SACKed data below snd_una
+
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit 4648dc97af9d496218a05353b0e442b3dfa6aaab ]
+
+This commit fixes tcp_shift_skb_data() so that it does not shift
+SACKed data below snd_una.
+
+This fixes an issue whose symptoms exactly match reports showing
+tp->sacked_out going negative since 3.3.0-rc4 (see "WARNING: at
+net/ipv4/tcp_input.c:3418" thread on netdev).
+
+Since 2008 (832d11c5cd076abc0aa1eaf7be96c81d1a59ce41)
+tcp_shift_skb_data() had been shifting SACKed ranges that were below
+snd_una. It checked that the *end* of the skb it was about to shift
+from was above snd_una, but did not check that the end of the actual
+shifted range was above snd_una; this commit adds that check.
+
+Shifting SACKed ranges below snd_una is problematic because for such
+ranges tcp_sacktag_one() short-circuits: it does not declare anything
+as SACKed and does not increase sacked_out.
+
+Before the fixes in commits cc9a672ee522d4805495b98680f4a3db5d0a0af9
+and daef52bab1fd26e24e8e9578f8fb33ba1d0cb412, shifting SACKed ranges
+below snd_una happened to work because tcp_shifted_skb() was always
+(incorrectly) passing in to tcp_sacktag_one() an skb whose end_seq
+tcp_shift_skb_data() had already guaranteed was beyond snd_una. Hence
+tcp_sacktag_one() never short-circuited and always increased
+tp->sacked_out in this case.
+
+After those two fixes, my testing has verified that shifting SACKed
+ranges below snd_una could cause tp->sacked_out to go negative with
+the following sequence of events:
+
+(1) tcp_shift_skb_data() sees an skb whose end_seq is beyond snd_una,
+    then shifts a prefix of that skb that is below snd_una
+
+(2) tcp_shifted_skb() increments the packet count of the
+    already-SACKed prev sk_buff
+
+(3) tcp_sacktag_one() sees the end of the new SACKed range is below
+    snd_una, so it short-circuits and doesn't increase tp->sacked_out
+
+(5) tcp_clean_rtx_queue() sees the SACKed skb has been ACKed,
+    decrements tp->sacked_out by this "inflated" pcount that was
+    missing a matching increase in tp->sacked_out, and hence
+    tp->sacked_out underflows to a u32 like 0xFFFFFFFF, which casted
+    to s32 is negative.
+
+(6) this leads to the warnings seen in the recent "WARNING: at
+    net/ipv4/tcp_input.c:3418" thread on the netdev list; e.g.:
+    tcp_input.c:3418  WARN_ON((int)tp->sacked_out < 0);
+
+More generally, I think this bug can be tickled in some cases where
+two or more ACKs from the receiver are lost and then a DSACK arrives
+that is immediately above an existing SACKed skb in the write queue.
+
+This fix changes tcp_shift_skb_data() to abort this sequence at step
+(1) in the scenario above by noticing that the bytes are below snd_una
+and not shifting them.
+
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1567,6 +1567,10 @@ static struct sk_buff *tcp_shift_skb_dat
+               }
+       }
++      /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */
++      if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
++              goto fallback;
++
+       if (!skb_shift(prev, skb, len))
+               goto fallback;
+       if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
diff --git a/queue-3.0/vmxnet3-fix-transport-header-size.patch b/queue-3.0/vmxnet3-fix-transport-header-size.patch
new file mode 100644 (file)
index 0000000..9e802c2
--- /dev/null
@@ -0,0 +1,54 @@
+From f147c0cb6c951b7ebbcc7ffe402e08d55ef30a2d Mon Sep 17 00:00:00 2001
+From: Shreyas Bhatewara <sbhatewara@vmware.com>
+Date: Tue, 28 Feb 2012 22:17:38 +0000
+Subject: vmxnet3: Fix transport header size
+
+
+From: Shreyas Bhatewara <sbhatewara@vmware.com>
+
+[ Upstream commit efead8710aad9e384730ecf25eae0287878840d7 ]
+
+Fix transport header size
+
+Fix the transpoert header size for UDP packets.
+
+Signed-off-by: Shreyas N Bhatewara <sbhatewara@vmware.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vmxnet3/vmxnet3_drv.c |    7 +------
+ drivers/net/vmxnet3/vmxnet3_int.h |    4 ++--
+ 2 files changed, 3 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/vmxnet3/vmxnet3_drv.c
++++ b/drivers/net/vmxnet3/vmxnet3_drv.c
+@@ -830,13 +830,8 @@ vmxnet3_parse_and_copy_hdr(struct sk_buf
+                                       ctx->l4_hdr_size = ((struct tcphdr *)
+                                          skb_transport_header(skb))->doff * 4;
+                               else if (iph->protocol == IPPROTO_UDP)
+-                                      /*
+-                                       * Use tcp header size so that bytes to
+-                                       * be copied are more than required by
+-                                       * the device.
+-                                       */
+                                       ctx->l4_hdr_size =
+-                                                      sizeof(struct tcphdr);
++                                                      sizeof(struct udphdr);
+                               else
+                                       ctx->l4_hdr_size = 0;
+                       } else {
+--- a/drivers/net/vmxnet3/vmxnet3_int.h
++++ b/drivers/net/vmxnet3/vmxnet3_int.h
+@@ -69,10 +69,10 @@
+ /*
+  * Version numbers
+  */
+-#define VMXNET3_DRIVER_VERSION_STRING   "1.1.18.0-k"
++#define VMXNET3_DRIVER_VERSION_STRING   "1.1.29.0-k"
+ /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
+-#define VMXNET3_DRIVER_VERSION_NUM      0x01011200
++#define VMXNET3_DRIVER_VERSION_NUM      0x01011D00
+ #if defined(CONFIG_PCI_MSI)
+       /* RSS only makes sense if MSI-X is supported. */