From 3ca56c552d398f6f0711cd0a40b96f4421b7441b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 14 Mar 2012 16:47:20 -0700 Subject: [PATCH] 3.0-stable patches added patches: atl1c-dont-use-highprio-tx-queue.patch bridge-check-return-value-of-ipv6_dev_get_saddr.patch ipsec-be-careful-of-non-existing-mac-headers.patch ipv6-fix-not-join-all-router-mcast-group-when-forwarding-set.patch neighbour-fixed-race-condition-at-tbl-nht.patch ppp-fix-ppp_mp_reconstruct-bad-seq-errors.patch tcp-don-t-fragment-sacked-skbs-in-tcp_mark_head_lost.patch tcp-fix-false-reordering-signal-in-tcp_shifted_skb.patch tcp-fix-tcp_shift_skb_data-to-not-shift-sacked-data-below-snd_una.patch vmxnet3-fix-transport-header-size.patch --- .../atl1c-dont-use-highprio-tx-queue.patch | 49 +++++++ ...k-return-value-of-ipv6_dev_get_saddr.patch | 37 +++++ ...-careful-of-non-existing-mac-headers.patch | 131 ++++++++++++++++++ ...uter-mcast-group-when-forwarding-set.patch | 33 +++++ ...bour-fixed-race-condition-at-tbl-nht.patch | 46 ++++++ ...ix-ppp_mp_reconstruct-bad-seq-errors.patch | 88 ++++++++++++ queue-3.0/series | 10 ++ ...nt-sacked-skbs-in-tcp_mark_head_lost.patch | 49 +++++++ ...reordering-signal-in-tcp_shifted_skb.patch | 70 ++++++++++ ...-not-shift-sacked-data-below-snd_una.patch | 86 ++++++++++++ .../vmxnet3-fix-transport-header-size.patch | 54 ++++++++ 11 files changed, 653 insertions(+) create mode 100644 queue-3.0/atl1c-dont-use-highprio-tx-queue.patch create mode 100644 queue-3.0/bridge-check-return-value-of-ipv6_dev_get_saddr.patch create mode 100644 queue-3.0/ipsec-be-careful-of-non-existing-mac-headers.patch create mode 100644 queue-3.0/ipv6-fix-not-join-all-router-mcast-group-when-forwarding-set.patch create mode 100644 queue-3.0/neighbour-fixed-race-condition-at-tbl-nht.patch create mode 100644 queue-3.0/ppp-fix-ppp_mp_reconstruct-bad-seq-errors.patch create mode 100644 queue-3.0/tcp-don-t-fragment-sacked-skbs-in-tcp_mark_head_lost.patch create mode 100644 queue-3.0/tcp-fix-false-reordering-signal-in-tcp_shifted_skb.patch create mode 100644 queue-3.0/tcp-fix-tcp_shift_skb_data-to-not-shift-sacked-data-below-snd_una.patch create mode 100644 queue-3.0/vmxnet3-fix-transport-header-size.patch diff --git a/queue-3.0/atl1c-dont-use-highprio-tx-queue.patch b/queue-3.0/atl1c-dont-use-highprio-tx-queue.patch new file mode 100644 index 00000000000..8d3204bad23 --- /dev/null +++ b/queue-3.0/atl1c-dont-use-highprio-tx-queue.patch @@ -0,0 +1,49 @@ +From 18a32153146cbbc5549d5bebd5bb8a3386b6bbe1 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 15 Feb 2012 20:43:11 +0000 +Subject: atl1c: dont use highprio tx queue + + +From: Eric Dumazet + +[ Upstream commit 11aad99af6ef629ff3b05d1c9f0936589b204316 ] + +This driver attempts to use two TX rings but lacks proper support : + +1) IRQ handler only takes care of TX completion on first TX ring +2) the stop/start logic uses the legacy functions (for non multiqueue +drivers) + +This means all packets witk skb mark set to 1 are sent through high +queue but are never cleaned and queue eventualy fills and block the +device, triggering the infamous "NETDEV WATCHDOG" message. + +Lets use a single TX ring to fix the problem, this driver is not a real +multiqueue one yet. + +Minimal fix for stable kernels. + +Reported-by: Thomas Meyer +Tested-by: Thomas Meyer +Signed-off-by: Eric Dumazet +Cc: Jay Cliburn +Cc: Chris Snook +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/atl1c/atl1c_main.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/drivers/net/atl1c/atl1c_main.c ++++ b/drivers/net/atl1c/atl1c_main.c +@@ -2223,10 +2223,6 @@ static netdev_tx_t atl1c_xmit_frame(stru + dev_info(&adapter->pdev->dev, "tx locked\n"); + return NETDEV_TX_LOCKED; + } +- if (skb->mark == 0x01) +- type = atl1c_trans_high; +- else +- type = atl1c_trans_normal; + + if (atl1c_tpd_avail(adapter, type) < tpd_req) { + /* no enough descriptor, just stop queue */ diff --git a/queue-3.0/bridge-check-return-value-of-ipv6_dev_get_saddr.patch b/queue-3.0/bridge-check-return-value-of-ipv6_dev_get_saddr.patch new file mode 100644 index 00000000000..eac46b07fc2 --- /dev/null +++ b/queue-3.0/bridge-check-return-value-of-ipv6_dev_get_saddr.patch @@ -0,0 +1,37 @@ +From 070413ec1d7df89c4e063d0ad10fa9b390935614 Mon Sep 17 00:00:00 2001 +From: Ulrich Weber +Date: Mon, 5 Mar 2012 04:52:44 +0000 +Subject: bridge: check return value of ipv6_dev_get_saddr() + + +From: Ulrich Weber + +[ Upstream commit d1d81d4c3dd886d5fa25a2c4fa1e39cb89613712 ] + +otherwise source IPv6 address of ICMPV6_MGM_QUERY packet +might be random junk if IPv6 is disabled on interface or +link-local address is not yet ready (DAD). + +Signed-off-by: Ulrich Weber +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_multicast.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -446,8 +446,11 @@ static struct sk_buff *br_ip6_multicast_ + ip6h->nexthdr = IPPROTO_HOPOPTS; + ip6h->hop_limit = 1; + ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); +- ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, +- &ip6h->saddr); ++ if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, ++ &ip6h->saddr)) { ++ kfree_skb(skb); ++ return NULL; ++ } + ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); + + hopopt = (u8 *)(ip6h + 1); diff --git a/queue-3.0/ipsec-be-careful-of-non-existing-mac-headers.patch b/queue-3.0/ipsec-be-careful-of-non-existing-mac-headers.patch new file mode 100644 index 00000000000..0fbb3f19ca1 --- /dev/null +++ b/queue-3.0/ipsec-be-careful-of-non-existing-mac-headers.patch @@ -0,0 +1,131 @@ +From 58e9fde060f31ba3fd48e1418a31fa66f9bde467 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Thu, 23 Feb 2012 10:55:02 +0000 +Subject: ipsec: be careful of non existing mac headers + + +From: Eric Dumazet + +[ Upstream commit 03606895cd98c0a628b17324fd7b5ff15db7e3cd ] + +Niccolo Belli reported ipsec crashes in case we handle a frame without +mac header (atm in his case) + +Before copying mac header, better make sure it is present. + +Bugzilla reference: https://bugzilla.kernel.org/show_bug.cgi?id=42809 + +Reported-by: Niccolò Belli +Tested-by: Niccolò Belli +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 10 ++++++++++ + net/ipv4/xfrm4_mode_beet.c | 5 +---- + net/ipv4/xfrm4_mode_tunnel.c | 6 ++---- + net/ipv6/xfrm6_mode_beet.c | 6 +----- + net/ipv6/xfrm6_mode_tunnel.c | 6 ++---- + 5 files changed, 16 insertions(+), 17 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -1370,6 +1370,16 @@ static inline void skb_set_mac_header(st + } + #endif /* NET_SKBUFF_DATA_USES_OFFSET */ + ++static inline void skb_mac_header_rebuild(struct sk_buff *skb) ++{ ++ if (skb_mac_header_was_set(skb)) { ++ const unsigned char *old_mac = skb_mac_header(skb); ++ ++ skb_set_mac_header(skb, -skb->mac_len); ++ memmove(skb_mac_header(skb), old_mac, skb->mac_len); ++ } ++} ++ + static inline int skb_checksum_start_offset(const struct sk_buff *skb) + { + return skb->csum_start - skb_headroom(skb); +--- a/net/ipv4/xfrm4_mode_beet.c ++++ b/net/ipv4/xfrm4_mode_beet.c +@@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_ + + skb_push(skb, sizeof(*iph)); + skb_reset_network_header(skb); +- +- memmove(skb->data - skb->mac_len, skb_mac_header(skb), +- skb->mac_len); +- skb_set_mac_header(skb, -skb->mac_len); ++ skb_mac_header_rebuild(skb); + + xfrm4_beet_make_header(skb); + +--- a/net/ipv4/xfrm4_mode_tunnel.c ++++ b/net/ipv4/xfrm4_mode_tunnel.c +@@ -66,7 +66,6 @@ static int xfrm4_mode_tunnel_output(stru + + static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) + { +- const unsigned char *old_mac; + int err = -EINVAL; + + if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) +@@ -84,10 +83,9 @@ static int xfrm4_mode_tunnel_input(struc + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip_ecn_decapsulate(skb); + +- old_mac = skb_mac_header(skb); +- skb_set_mac_header(skb, -skb->mac_len); +- memmove(skb_mac_header(skb), old_mac, skb->mac_len); + skb_reset_network_header(skb); ++ skb_mac_header_rebuild(skb); ++ + err = 0; + + out: +--- a/net/ipv6/xfrm6_mode_beet.c ++++ b/net/ipv6/xfrm6_mode_beet.c +@@ -80,7 +80,6 @@ static int xfrm6_beet_output(struct xfrm + static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) + { + struct ipv6hdr *ip6h; +- const unsigned char *old_mac; + int size = sizeof(struct ipv6hdr); + int err; + +@@ -90,10 +89,7 @@ static int xfrm6_beet_input(struct xfrm_ + + __skb_push(skb, size); + skb_reset_network_header(skb); +- +- old_mac = skb_mac_header(skb); +- skb_set_mac_header(skb, -skb->mac_len); +- memmove(skb_mac_header(skb), old_mac, skb->mac_len); ++ skb_mac_header_rebuild(skb); + + xfrm6_beet_make_header(skb); + +--- a/net/ipv6/xfrm6_mode_tunnel.c ++++ b/net/ipv6/xfrm6_mode_tunnel.c +@@ -63,7 +63,6 @@ static int xfrm6_mode_tunnel_output(stru + static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) + { + int err = -EINVAL; +- const unsigned char *old_mac; + + if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) + goto out; +@@ -80,10 +79,9 @@ static int xfrm6_mode_tunnel_input(struc + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip6_ecn_decapsulate(skb); + +- old_mac = skb_mac_header(skb); +- skb_set_mac_header(skb, -skb->mac_len); +- memmove(skb_mac_header(skb), old_mac, skb->mac_len); + skb_reset_network_header(skb); ++ skb_mac_header_rebuild(skb); ++ + err = 0; + + out: diff --git a/queue-3.0/ipv6-fix-not-join-all-router-mcast-group-when-forwarding-set.patch b/queue-3.0/ipv6-fix-not-join-all-router-mcast-group-when-forwarding-set.patch new file mode 100644 index 00000000000..f174853a45c --- /dev/null +++ b/queue-3.0/ipv6-fix-not-join-all-router-mcast-group-when-forwarding-set.patch @@ -0,0 +1,33 @@ +From 8f321ec08beba0cba001367bfbed831c8d316978 Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Mon, 5 Mar 2012 14:45:17 +0000 +Subject: IPv6: Fix not join all-router mcast group when forwarding set. + + +From: Li Wei + +[ Upstream commit d6ddef9e641d1229d4ec841dc75ae703171c3e92 ] + +When forwarding was set and a new net device is register, +we need add this device to the all-router mcast group. + +Signed-off-by: Li Wei +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -433,6 +433,10 @@ static struct inet6_dev * ipv6_add_dev(s + /* Join all-node multicast group */ + ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); + ++ /* Join all-router multicast group if forwarding is set */ ++ if (ndev->cnf.forwarding && dev && (dev->flags & IFF_MULTICAST)) ++ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); ++ + return ndev; + } + diff --git a/queue-3.0/neighbour-fixed-race-condition-at-tbl-nht.patch b/queue-3.0/neighbour-fixed-race-condition-at-tbl-nht.patch new file mode 100644 index 00000000000..91e7a154f08 --- /dev/null +++ b/queue-3.0/neighbour-fixed-race-condition-at-tbl-nht.patch @@ -0,0 +1,46 @@ +From c98dd2fe278869db8d534d989c9ee879bd23dbb4 Mon Sep 17 00:00:00 2001 +From: Michel Machado +Date: Tue, 21 Feb 2012 11:04:13 +0000 +Subject: neighbour: Fixed race condition at tbl->nht + + +From: Michel Machado + +[ Upstream commit 84338a6c9dbb6ff3de4749864020f8f25d86fc81 ] + +When the fixed race condition happens: + +1. While function neigh_periodic_work scans the neighbor hash table +pointed by field tbl->nht, it unlocks and locks tbl->lock between +buckets in order to call cond_resched. + +2. Assume that function neigh_periodic_work calls cond_resched, that is, +the lock tbl->lock is available, and function neigh_hash_grow runs. + +3. Once function neigh_hash_grow finishes, and RCU calls +neigh_hash_free_rcu, the original struct neigh_hash_table that function +neigh_periodic_work was using doesn't exist anymore. + +4. Once back at neigh_periodic_work, whenever the old struct +neigh_hash_table is accessed, things can go badly. + +Signed-off-by: Michel Machado +CC: "David S. Miller" +CC: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/neighbour.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/core/neighbour.c ++++ b/net/core/neighbour.c +@@ -823,6 +823,8 @@ next_elt: + write_unlock_bh(&tbl->lock); + cond_resched(); + write_lock_bh(&tbl->lock); ++ nht = rcu_dereference_protected(tbl->nht, ++ lockdep_is_held(&tbl->lock)); + } + /* Cycle through all hash buckets every base_reachable_time/2 ticks. + * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 diff --git a/queue-3.0/ppp-fix-ppp_mp_reconstruct-bad-seq-errors.patch b/queue-3.0/ppp-fix-ppp_mp_reconstruct-bad-seq-errors.patch new file mode 100644 index 00000000000..f38072b1217 --- /dev/null +++ b/queue-3.0/ppp-fix-ppp_mp_reconstruct-bad-seq-errors.patch @@ -0,0 +1,88 @@ +From a52a55a6eca3a942c7126e1543ae5552225bc38d Mon Sep 17 00:00:00 2001 +From: Ben McKeegan +Date: Fri, 24 Feb 2012 06:33:56 +0000 +Subject: ppp: fix 'ppp_mp_reconstruct bad seq' errors + + +From: Ben McKeegan + +[ Upstream commit 8a49ad6e89feb5015e77ce6efeb2678947117e20 ] + +This patch fixes a (mostly cosmetic) bug introduced by the patch +'ppp: Use SKB queue abstraction interfaces in fragment processing' +found here: http://www.spinics.net/lists/netdev/msg153312.html + +The above patch rewrote and moved the code responsible for cleaning +up discarded fragments but the new code does not catch every case +where this is necessary. This results in some discarded fragments +remaining in the queue, and triggering a 'bad seq' error on the +subsequent call to ppp_mp_reconstruct. Fragments are discarded +whenever other fragments of the same frame have been lost. +This can generate a lot of unwanted and misleading log messages. + +This patch also adds additional detail to the debug logging to +make it clearer which fragments were lost and which other fragments +were discarded as a result of losses. (Run pppd with 'kdebug 1' +option to enable debug logging.) + +Signed-off-by: Ben McKeegan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp_generic.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +--- a/drivers/net/ppp_generic.c ++++ b/drivers/net/ppp_generic.c +@@ -2019,14 +2019,22 @@ ppp_mp_reconstruct(struct ppp *ppp) + continue; + } + if (PPP_MP_CB(p)->sequence != seq) { ++ u32 oldseq; + /* Fragment `seq' is missing. If it is after + minseq, it might arrive later, so stop here. */ + if (seq_after(seq, minseq)) + break; + /* Fragment `seq' is lost, keep going. */ + lost = 1; ++ oldseq = seq; + seq = seq_before(minseq, PPP_MP_CB(p)->sequence)? + minseq + 1: PPP_MP_CB(p)->sequence; ++ ++ if (ppp->debug & 1) ++ netdev_printk(KERN_DEBUG, ppp->dev, ++ "lost frag %u..%u\n", ++ oldseq, seq-1); ++ + goto again; + } + +@@ -2071,6 +2079,10 @@ ppp_mp_reconstruct(struct ppp *ppp) + struct sk_buff *tmp2; + + skb_queue_reverse_walk_from_safe(list, p, tmp2) { ++ if (ppp->debug & 1) ++ netdev_printk(KERN_DEBUG, ppp->dev, ++ "discarding frag %u\n", ++ PPP_MP_CB(p)->sequence); + __skb_unlink(p, list); + kfree_skb(p); + } +@@ -2086,6 +2098,17 @@ ppp_mp_reconstruct(struct ppp *ppp) + /* If we have discarded any fragments, + signal a receive error. */ + if (PPP_MP_CB(head)->sequence != ppp->nextseq) { ++ skb_queue_walk_safe(list, p, tmp) { ++ if (p == head) ++ break; ++ if (ppp->debug & 1) ++ netdev_printk(KERN_DEBUG, ppp->dev, ++ "discarding frag %u\n", ++ PPP_MP_CB(p)->sequence); ++ __skb_unlink(p, list); ++ kfree_skb(p); ++ } ++ + if (ppp->debug & 1) + netdev_printk(KERN_DEBUG, ppp->dev, + " missed pkts %u..%u\n", diff --git a/queue-3.0/series b/queue-3.0/series index 500c5b60f85..9368e627d7d 100644 --- a/queue-3.0/series +++ b/queue-3.0/series @@ -10,3 +10,13 @@ rt2x00-fix-random-stalls.patch vfs-fix-return-value-from-do_last.patch vfs-fix-double-put-after-complete_walk.patch acer-wmi-no-wifi-rfkill-on-lenovo-machines.patch +neighbour-fixed-race-condition-at-tbl-nht.patch +ipsec-be-careful-of-non-existing-mac-headers.patch +ppp-fix-ppp_mp_reconstruct-bad-seq-errors.patch +tcp-fix-false-reordering-signal-in-tcp_shifted_skb.patch +vmxnet3-fix-transport-header-size.patch +tcp-don-t-fragment-sacked-skbs-in-tcp_mark_head_lost.patch +bridge-check-return-value-of-ipv6_dev_get_saddr.patch +tcp-fix-tcp_shift_skb_data-to-not-shift-sacked-data-below-snd_una.patch +ipv6-fix-not-join-all-router-mcast-group-when-forwarding-set.patch +atl1c-dont-use-highprio-tx-queue.patch diff --git a/queue-3.0/tcp-don-t-fragment-sacked-skbs-in-tcp_mark_head_lost.patch b/queue-3.0/tcp-don-t-fragment-sacked-skbs-in-tcp_mark_head_lost.patch new file mode 100644 index 00000000000..820bcc47e64 --- /dev/null +++ b/queue-3.0/tcp-don-t-fragment-sacked-skbs-in-tcp_mark_head_lost.patch @@ -0,0 +1,49 @@ +From d590045e3989119c2f7ccbdd81edb940fb13b80f Mon Sep 17 00:00:00 2001 +From: Neal Cardwell +Date: Fri, 2 Mar 2012 21:36:51 +0000 +Subject: tcp: don't fragment SACKed skbs in tcp_mark_head_lost() + + +From: Neal Cardwell + +[ Upstream commit c0638c247f559e1a16ee79e54df14bca2cb679ea ] + +In tcp_mark_head_lost() we should not attempt to fragment a SACKed skb +to mark the first portion as lost. This is for two primary reasons: + +(1) tcp_shifted_skb() coalesces adjacent regions of SACKed skbs. When +doing this, it preserves the sum of their packet counts in order to +reflect the real-world dynamics on the wire. But given that skbs can +have remainders that do not align to MSS boundaries, this packet count +preservation means that for SACKed skbs there is not necessarily a +direct linear relationship between tcp_skb_pcount(skb) and +skb->len. Thus tcp_mark_head_lost()'s previous attempts to fragment +off and mark as lost a prefix of length (packets - oldcnt)*mss from +SACKed skbs were leading to occasional failures of the WARN_ON(len > +skb->len) in tcp_fragment() (which used to be a BUG_ON(); see the +recent "crash in tcp_fragment" thread on netdev). + +(2) there is no real point in fragmenting off part of a SACKed skb and +calling tcp_skb_mark_lost() on it, since tcp_skb_mark_lost() is a NOP +for SACKed skbs. + +Signed-off-by: Neal Cardwell +Acked-by: Ilpo Järvinen +Acked-by: Yuchung Cheng +Acked-by: Nandita Dukkipati +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -2549,6 +2549,7 @@ static void tcp_mark_head_lost(struct so + + if (cnt > packets) { + if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || ++ (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || + (oldcnt >= packets)) + break; + diff --git a/queue-3.0/tcp-fix-false-reordering-signal-in-tcp_shifted_skb.patch b/queue-3.0/tcp-fix-false-reordering-signal-in-tcp_shifted_skb.patch new file mode 100644 index 00000000000..1430231fe7f --- /dev/null +++ b/queue-3.0/tcp-fix-false-reordering-signal-in-tcp_shifted_skb.patch @@ -0,0 +1,70 @@ +From bd31d3812b91196b37c5fecb280b9b1d8338367a Mon Sep 17 00:00:00 2001 +From: Neal Cardwell +Date: Sun, 26 Feb 2012 10:06:19 +0000 +Subject: tcp: fix false reordering signal in tcp_shifted_skb + + +From: Neal Cardwell + +[ Upstream commit 4c90d3b30334833450ccbb02f452d4972a3c3c3f ] + +When tcp_shifted_skb() shifts bytes from the skb that is currently +pointed to by 'highest_sack' then the increment of +TCP_SKB_CB(skb)->seq implicitly advances tcp_highest_sack_seq(). This +implicit advancement, combined with the recent fix to pass the correct +SACKed range into tcp_sacktag_one(), caused tcp_sacktag_one() to think +that the newly SACKed range was before the tcp_highest_sack_seq(), +leading to a call to tcp_update_reordering() with a degree of +reordering matching the size of the newly SACKed range (typically just +1 packet, which is a NOP, but potentially larger). + +This commit fixes this by simply calling tcp_sacktag_one() before the +TCP_SKB_CB(skb)->seq advancement that can advance our notion of the +highest SACKed sequence. + +Correspondingly, we can simplify the code a little now that +tcp_shifted_skb() should update the lost_cnt_hint in all cases where +skb == tp->lost_skb_hint. + +Signed-off-by: Neal Cardwell +Acked-by: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1385,8 +1385,16 @@ static int tcp_shifted_skb(struct sock * + + BUG_ON(!pcount); + +- /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */ +- if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint)) ++ /* Adjust counters and hints for the newly sacked sequence ++ * range but discard the return value since prev is already ++ * marked. We must tag the range first because the seq ++ * advancement below implicitly advances ++ * tcp_highest_sack_seq() when skb is highest_sack. ++ */ ++ tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, ++ start_seq, end_seq, dup_sack, pcount); ++ ++ if (skb == tp->lost_skb_hint) + tp->lost_cnt_hint += pcount; + + TCP_SKB_CB(prev)->end_seq += shifted; +@@ -1412,12 +1420,6 @@ static int tcp_shifted_skb(struct sock * + skb_shinfo(skb)->gso_type = 0; + } + +- /* Adjust counters and hints for the newly sacked sequence range but +- * discard the return value since prev is already marked. +- */ +- tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, +- start_seq, end_seq, dup_sack, pcount); +- + /* Difference in this won't matter, both ACKed by the same cumul. ACK */ + TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); + diff --git a/queue-3.0/tcp-fix-tcp_shift_skb_data-to-not-shift-sacked-data-below-snd_una.patch b/queue-3.0/tcp-fix-tcp_shift_skb_data-to-not-shift-sacked-data-below-snd_una.patch new file mode 100644 index 00000000000..37909e53b8b --- /dev/null +++ b/queue-3.0/tcp-fix-tcp_shift_skb_data-to-not-shift-sacked-data-below-snd_una.patch @@ -0,0 +1,86 @@ +From 1a637998ce3026247103ee263697f32a507d39c9 Mon Sep 17 00:00:00 2001 +From: Neal Cardwell +Date: Mon, 5 Mar 2012 19:35:04 +0000 +Subject: tcp: fix tcp_shift_skb_data() to not shift SACKed data below snd_una + + +From: Neal Cardwell + +[ Upstream commit 4648dc97af9d496218a05353b0e442b3dfa6aaab ] + +This commit fixes tcp_shift_skb_data() so that it does not shift +SACKed data below snd_una. + +This fixes an issue whose symptoms exactly match reports showing +tp->sacked_out going negative since 3.3.0-rc4 (see "WARNING: at +net/ipv4/tcp_input.c:3418" thread on netdev). + +Since 2008 (832d11c5cd076abc0aa1eaf7be96c81d1a59ce41) +tcp_shift_skb_data() had been shifting SACKed ranges that were below +snd_una. It checked that the *end* of the skb it was about to shift +from was above snd_una, but did not check that the end of the actual +shifted range was above snd_una; this commit adds that check. + +Shifting SACKed ranges below snd_una is problematic because for such +ranges tcp_sacktag_one() short-circuits: it does not declare anything +as SACKed and does not increase sacked_out. + +Before the fixes in commits cc9a672ee522d4805495b98680f4a3db5d0a0af9 +and daef52bab1fd26e24e8e9578f8fb33ba1d0cb412, shifting SACKed ranges +below snd_una happened to work because tcp_shifted_skb() was always +(incorrectly) passing in to tcp_sacktag_one() an skb whose end_seq +tcp_shift_skb_data() had already guaranteed was beyond snd_una. Hence +tcp_sacktag_one() never short-circuited and always increased +tp->sacked_out in this case. + +After those two fixes, my testing has verified that shifting SACKed +ranges below snd_una could cause tp->sacked_out to go negative with +the following sequence of events: + +(1) tcp_shift_skb_data() sees an skb whose end_seq is beyond snd_una, + then shifts a prefix of that skb that is below snd_una + +(2) tcp_shifted_skb() increments the packet count of the + already-SACKed prev sk_buff + +(3) tcp_sacktag_one() sees the end of the new SACKed range is below + snd_una, so it short-circuits and doesn't increase tp->sacked_out + +(5) tcp_clean_rtx_queue() sees the SACKed skb has been ACKed, + decrements tp->sacked_out by this "inflated" pcount that was + missing a matching increase in tp->sacked_out, and hence + tp->sacked_out underflows to a u32 like 0xFFFFFFFF, which casted + to s32 is negative. + +(6) this leads to the warnings seen in the recent "WARNING: at + net/ipv4/tcp_input.c:3418" thread on the netdev list; e.g.: + tcp_input.c:3418 WARN_ON((int)tp->sacked_out < 0); + +More generally, I think this bug can be tickled in some cases where +two or more ACKs from the receiver are lost and then a DSACK arrives +that is immediately above an existing SACKed skb in the write queue. + +This fix changes tcp_shift_skb_data() to abort this sequence at step +(1) in the scenario above by noticing that the bytes are below snd_una +and not shifting them. + +Signed-off-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1567,6 +1567,10 @@ static struct sk_buff *tcp_shift_skb_dat + } + } + ++ /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */ ++ if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) ++ goto fallback; ++ + if (!skb_shift(prev, skb, len)) + goto fallback; + if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) diff --git a/queue-3.0/vmxnet3-fix-transport-header-size.patch b/queue-3.0/vmxnet3-fix-transport-header-size.patch new file mode 100644 index 00000000000..9e802c2055a --- /dev/null +++ b/queue-3.0/vmxnet3-fix-transport-header-size.patch @@ -0,0 +1,54 @@ +From f147c0cb6c951b7ebbcc7ffe402e08d55ef30a2d Mon Sep 17 00:00:00 2001 +From: Shreyas Bhatewara +Date: Tue, 28 Feb 2012 22:17:38 +0000 +Subject: vmxnet3: Fix transport header size + + +From: Shreyas Bhatewara + +[ Upstream commit efead8710aad9e384730ecf25eae0287878840d7 ] + +Fix transport header size + +Fix the transpoert header size for UDP packets. + +Signed-off-by: Shreyas N Bhatewara +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vmxnet3/vmxnet3_drv.c | 7 +------ + drivers/net/vmxnet3/vmxnet3_int.h | 4 ++-- + 2 files changed, 3 insertions(+), 8 deletions(-) + +--- a/drivers/net/vmxnet3/vmxnet3_drv.c ++++ b/drivers/net/vmxnet3/vmxnet3_drv.c +@@ -830,13 +830,8 @@ vmxnet3_parse_and_copy_hdr(struct sk_buf + ctx->l4_hdr_size = ((struct tcphdr *) + skb_transport_header(skb))->doff * 4; + else if (iph->protocol == IPPROTO_UDP) +- /* +- * Use tcp header size so that bytes to +- * be copied are more than required by +- * the device. +- */ + ctx->l4_hdr_size = +- sizeof(struct tcphdr); ++ sizeof(struct udphdr); + else + ctx->l4_hdr_size = 0; + } else { +--- a/drivers/net/vmxnet3/vmxnet3_int.h ++++ b/drivers/net/vmxnet3/vmxnet3_int.h +@@ -69,10 +69,10 @@ + /* + * Version numbers + */ +-#define VMXNET3_DRIVER_VERSION_STRING "1.1.18.0-k" ++#define VMXNET3_DRIVER_VERSION_STRING "1.1.29.0-k" + + /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ +-#define VMXNET3_DRIVER_VERSION_NUM 0x01011200 ++#define VMXNET3_DRIVER_VERSION_NUM 0x01011D00 + + #if defined(CONFIG_PCI_MSI) + /* RSS only makes sense if MSI-X is supported. */ -- 2.47.3