]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Add networking patches to 2.6.25 queue
authorChris Wright <chrisw@sous-sol.org>
Mon, 9 Jun 2008 21:36:09 +0000 (14:36 -0700)
committerChris Wright <chrisw@sous-sol.org>
Mon, 9 Jun 2008 21:36:09 +0000 (14:36 -0700)
21 files changed:
queue-2.6.25/af_key-fix-selector-family-initialization.patch [new file with mode: 0644]
queue-2.6.25/ax25-fix-null-pointer-dereference-and-lockup.patch [new file with mode: 0644]
queue-2.6.25/bluetooth-fix-locking-bug-in-the-rfcomm-socket-cleanup-handling.patch [new file with mode: 0644]
queue-2.6.25/can-fix-copy_from_user-results-interpretation.patch [new file with mode: 0644]
queue-2.6.25/cassini-only-use-chip-checksum-for-ipv4-packets.patch [new file with mode: 0644]
queue-2.6.25/ipsec-use-the-correct-ip_local_out-function.patch [new file with mode: 0644]
queue-2.6.25/l2tp-avoid-skb-truesize-bug-if-headroom-is-increased.patch [new file with mode: 0644]
queue-2.6.25/l2tp-fix-possible-oops-if-transmitting-or-receiving-when-tunnel-goes-down.patch [new file with mode: 0644]
queue-2.6.25/l2tp-fix-possible-warn_on-from-socket-code-when-udp-socket-is-closed.patch [new file with mode: 0644]
queue-2.6.25/net-fix-call-to-change_rx_flags-in-dev_change_flags.patch [new file with mode: 0644]
queue-2.6.25/net_sched-cls_api-fix-return-value-for-non-existant-classifiers.patch [new file with mode: 0644]
queue-2.6.25/netlink-fix-nla_parse_nested_compat-to-call-nla_parse-directly.patch [new file with mode: 0644]
queue-2.6.25/series
queue-2.6.25/tcp-allow-send-limited-cwnd-to-grow-up-to-max_burst-when-gso-disabled.patch [new file with mode: 0644]
queue-2.6.25/tcp-fix-skb-vs-fack_count-out-of-sync-condition.patch [new file with mode: 0644]
queue-2.6.25/tcp-frto-fix-fallback-to-conventional-recovery.patch [new file with mode: 0644]
queue-2.6.25/tcp-frto-sack-variant-is-errorneously-used-with-newreno.patch [new file with mode: 0644]
queue-2.6.25/tcp-frto-work-around-inorder-receivers.patch [new file with mode: 0644]
queue-2.6.25/tcp-limit-cwnd-growth-when-deferring-for-gso.patch [new file with mode: 0644]
queue-2.6.25/tcp-tcp-connection-times-out-if-icmp-frag-needed-is-delayed.patch [new file with mode: 0644]
queue-2.6.25/vlan-correctly-handle-device-notifications-for-layered-vlan-devices.patch [new file with mode: 0644]

diff --git a/queue-2.6.25/af_key-fix-selector-family-initialization.patch b/queue-2.6.25/af_key-fix-selector-family-initialization.patch
new file mode 100644 (file)
index 0000000..693987e
--- /dev/null
@@ -0,0 +1,34 @@
+From ead8aded50c2a631955fcf0fb6c477c02f7b9e46 Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: Kazunori MIYAZAWA <kazunori@miyazawa.org>
+Date: Wed, 21 May 2008 13:26:11 -0700
+Subject: af_key: Fix selector family initialization.
+
+From: Kazunori MIYAZAWA <kazunori@miyazawa.org>
+
+[ upstream commit: 4da5105687e0993a3bbdcffd89b2b94d9377faab ]
+
+This propagates the xfrm_user fix made in commit
+bcf0dda8d2408fe1c1040cdec5a98e5fcad2ac72 ("[XFRM]: xfrm_user: fix
+selector family initialization")
+
+Based upon a bug report from, and tested by, Alan Swanson.
+
+Signed-off-by: Kazunori MIYAZAWA <kazunori@miyazawa.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/key/af_key.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/key/af_key.c
++++ b/net/key/af_key.c
+@@ -1219,7 +1219,7 @@ static struct xfrm_state * pfkey_msg2xfr
+               x->sel.prefixlen_s = addr->sadb_address_prefixlen;
+       }
+-      if (x->props.mode == XFRM_MODE_TRANSPORT)
++      if (!x->sel.family)
+               x->sel.family = x->props.family;
+       if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) {
diff --git a/queue-2.6.25/ax25-fix-null-pointer-dereference-and-lockup.patch b/queue-2.6.25/ax25-fix-null-pointer-dereference-and-lockup.patch
new file mode 100644 (file)
index 0000000..be4ed91
--- /dev/null
@@ -0,0 +1,50 @@
+From 0d8322352083476cb62367887ecf0722549c92f2 Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: Jarek Poplawski <jarkao2@gmail.com>
+Date: Tue, 3 Jun 2008 14:53:46 -0700
+Subject: ax25: Fix NULL pointer dereference and lockup.
+
+From: Jarek Poplawski <jarkao2@gmail.com>
+
+[ Upstream commit: 7dccf1f4e1696c79bff064c3770867cc53cbc71c ]
+
+There is only one function in AX25 calling skb_append(), and it really
+looks suspicious: appends skb after previously enqueued one, but in
+the meantime this previous skb could be removed from the queue.
+
+This patch Fixes it the simple way, so this is not fully compatible with
+the current method, but testing hasn't shown any problems.
+
+Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/ax25/ax25_subr.c |   11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+--- a/net/ax25/ax25_subr.c
++++ b/net/ax25/ax25_subr.c
+@@ -64,20 +64,15 @@ void ax25_frames_acked(ax25_cb *ax25, un
+ void ax25_requeue_frames(ax25_cb *ax25)
+ {
+-      struct sk_buff *skb, *skb_prev = NULL;
++      struct sk_buff *skb;
+       /*
+        * Requeue all the un-ack-ed frames on the output queue to be picked
+        * up by ax25_kick called from the timer. This arrangement handles the
+        * possibility of an empty output queue.
+        */
+-      while ((skb = skb_dequeue(&ax25->ack_queue)) != NULL) {
+-              if (skb_prev == NULL)
+-                      skb_queue_head(&ax25->write_queue, skb);
+-              else
+-                      skb_append(skb_prev, skb, &ax25->write_queue);
+-              skb_prev = skb;
+-      }
++      while ((skb = skb_dequeue_tail(&ax25->ack_queue)) != NULL)
++              skb_queue_head(&ax25->write_queue, skb);
+ }
+ /*
diff --git a/queue-2.6.25/bluetooth-fix-locking-bug-in-the-rfcomm-socket-cleanup-handling.patch b/queue-2.6.25/bluetooth-fix-locking-bug-in-the-rfcomm-socket-cleanup-handling.patch
new file mode 100644 (file)
index 0000000..abb3d28
--- /dev/null
@@ -0,0 +1,58 @@
+From 59cec518a8109d2c696210fc6c761174d9b42df9 Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: Arjan van de Ven <arjan@linux.intel.com>
+Date: Thu, 29 May 2008 01:32:47 -0700
+Subject: bluetooth: fix locking bug in the rfcomm socket cleanup handling
+
+From: Arjan van de Ven <arjan@linux.intel.com>
+
+[ Upstream commit: 7dccf1f4e1696c79bff064c3770867cc53cbc71c ]
+
+in net/bluetooth/rfcomm/sock.c, rfcomm_sk_state_change() does the
+following operation:
+
+        if (parent && sock_flag(sk, SOCK_ZAPPED)) {
+                /* We have to drop DLC lock here, otherwise
+                 * rfcomm_sock_destruct() will dead lock. */
+                rfcomm_dlc_unlock(d);
+                rfcomm_sock_kill(sk);
+                rfcomm_dlc_lock(d);
+        }
+}
+
+which is fine, since rfcomm_sock_kill() will call sk_free() which will call
+rfcomm_sock_destruct() which takes the rfcomm_dlc_lock()... so far so good.
+
+HOWEVER, this assumes that the rfcomm_sk_state_change() function always gets
+called with the rfcomm_dlc_lock() taken. This is the case for all but one
+case, and in that case where we don't have the lock, we do a double unlock
+followed by an attempt to take the lock, which due to underflow isn't
+going anywhere fast.
+
+This patch fixes this by moving the stragling case inside the lock, like
+the other usages of the same call are doing in this code.
+
+This was found with the help of the www.kerneloops.org project, where this
+deadlock was observed 51 times at this point in time:
+http://www.kerneloops.org/search.php?search=rfcomm_sock_destruct
+
+Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
+Acked-by: Marcel Holtmann <marcel@holtmann.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/bluetooth/rfcomm/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/bluetooth/rfcomm/core.c
++++ b/net/bluetooth/rfcomm/core.c
+@@ -423,8 +423,8 @@ static int __rfcomm_dlc_close(struct rfc
+               rfcomm_dlc_lock(d);
+               d->state = BT_CLOSED;
+-              rfcomm_dlc_unlock(d);
+               d->state_change(d, err);
++              rfcomm_dlc_unlock(d);
+               skb_queue_purge(&d->tx_queue);
+               rfcomm_dlc_unlink(d);
diff --git a/queue-2.6.25/can-fix-copy_from_user-results-interpretation.patch b/queue-2.6.25/can-fix-copy_from_user-results-interpretation.patch
new file mode 100644 (file)
index 0000000..3f4c756
--- /dev/null
@@ -0,0 +1,77 @@
+From 3966365cee3c6322936248050eddb10f765b2032 Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: Sam Ravnborg <sam@ravnborg.org>
+Date: Mon, 9 Jun 2008 11:22:01 -0700
+Subject: can: Fix copy_from_user() results interpretation
+
+From: Sam Ravnborg <sam@ravnborg.org>
+
+[ Upstream commit: 3f91bd420a955803421f2db17b2e04aacfbb2bb8 ]
+
+Both copy_to_ and _from_user return the number of bytes, that failed to
+reach their destination, not the 0/-EXXX values.
+
+Based on patch from Pavel Emelyanov <xemul@openvz.org>
+
+Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
+Acked-by: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/can/raw.c |   21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+--- a/net/can/raw.c
++++ b/net/can/raw.c
+@@ -435,15 +435,13 @@ static int raw_setsockopt(struct socket 
+                       if (!filter)
+                               return -ENOMEM;
+-                      err = copy_from_user(filter, optval, optlen);
+-                      if (err) {
++                      if (copy_from_user(filter, optval, optlen)) {
+                               kfree(filter);
+-                              return err;
++                              return -EFAULT;
+                       }
+               } else if (count == 1) {
+-                      err = copy_from_user(&sfilter, optval, optlen);
+-                      if (err)
+-                              return err;
++                      if (copy_from_user(&sfilter, optval, optlen))
++                              return -EFAULT;
+               }
+               lock_sock(sk);
+@@ -493,9 +491,8 @@ static int raw_setsockopt(struct socket 
+               if (optlen != sizeof(err_mask))
+                       return -EINVAL;
+-              err = copy_from_user(&err_mask, optval, optlen);
+-              if (err)
+-                      return err;
++              if (copy_from_user(&err_mask, optval, optlen))
++                      return -EFAULT;
+               err_mask &= CAN_ERR_MASK;
+@@ -531,7 +528,8 @@ static int raw_setsockopt(struct socket 
+               if (optlen != sizeof(ro->loopback))
+                       return -EINVAL;
+-              err = copy_from_user(&ro->loopback, optval, optlen);
++              if (copy_from_user(&ro->loopback, optval, optlen))
++                      return -EFAULT;
+               break;
+@@ -539,7 +537,8 @@ static int raw_setsockopt(struct socket 
+               if (optlen != sizeof(ro->recv_own_msgs))
+                       return -EINVAL;
+-              err = copy_from_user(&ro->recv_own_msgs, optval, optlen);
++              if (copy_from_user(&ro->recv_own_msgs, optval, optlen))
++                      return -EFAULT;
+               break;
diff --git a/queue-2.6.25/cassini-only-use-chip-checksum-for-ipv4-packets.patch b/queue-2.6.25/cassini-only-use-chip-checksum-for-ipv4-packets.patch
new file mode 100644 (file)
index 0000000..1f2ced6
--- /dev/null
@@ -0,0 +1,54 @@
+From 3be7c5ab93cf875aff5d91974d1df0851c329fd7 Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: David S. Miller <davem@davemloft.net>
+Date: Wed, 21 May 2008 17:05:34 -0700
+Subject: cassini: Only use chip checksum for ipv4 packets.
+
+From: David S. Miller <davem@davemloft.net>
+
+[ upstream commit: b1443e2f6501f06930a162ff1ff08382a98bf23e ]
+
+According to David Monro, at least with Natsemi Saturn chips the
+cassini driver has some trouble with ipv6 checksums.
+
+Until we have more information about what's going on here, only
+use the chip checksums for ipv4.
+
+This workaround was suggested and tested by David.
+
+Update version and release date.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ drivers/net/cassini.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/cassini.c
++++ b/drivers/net/cassini.c
+@@ -142,8 +142,8 @@
+ #define DRV_MODULE_NAME               "cassini"
+ #define PFX DRV_MODULE_NAME   ": "
+-#define DRV_MODULE_VERSION    "1.5"
+-#define DRV_MODULE_RELDATE    "4 Jan 2008"
++#define DRV_MODULE_VERSION    "1.6"
++#define DRV_MODULE_RELDATE    "21 May 2008"
+ #define CAS_DEF_MSG_ENABLE      \
+       (NETIF_MSG_DRV          | \
+@@ -2140,9 +2140,12 @@ end_copy_pkt:
+               if (addr)
+                       cas_page_unmap(addr);
+       }
+-      skb->csum = csum_unfold(~csum);
+-      skb->ip_summed = CHECKSUM_COMPLETE;
+       skb->protocol = eth_type_trans(skb, cp->dev);
++      if (skb->protocol == htons(ETH_P_IP)) {
++              skb->csum = csum_unfold(~csum);
++              skb->ip_summed = CHECKSUM_COMPLETE;
++      } else
++              skb->ip_summed = CHECKSUM_NONE;
+       return len;
+ }
diff --git a/queue-2.6.25/ipsec-use-the-correct-ip_local_out-function.patch b/queue-2.6.25/ipsec-use-the-correct-ip_local_out-function.patch
new file mode 100644 (file)
index 0000000..fbf72f9
--- /dev/null
@@ -0,0 +1,54 @@
+From 2c2864845e1348c1e04919130c3152d60301fee2 Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Tue, 20 May 2008 14:32:14 -0700
+Subject: ipsec: Use the correct ip_local_out function
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ upstream commit: 1ac06e0306d0192a7a4d9ea1c9e06d355ce7e7d3 ]
+
+Because the IPsec output function xfrm_output_resume does its
+own dst_output call it should always call __ip_local_output
+instead of ip_local_output as the latter may invoke dst_output
+directly.  Otherwise the return values from nf_hook and dst_output
+may clash as they both use the value 1 but for different purposes.
+
+When that clash occurs this can cause a packet to be used after
+it has been freed which usually leads to a crash.  Because the
+offending value is only returned from dst_output with qdiscs
+such as HTB, this bug is normally not visible.
+
+Thanks to Marco Berizzi for his perseverance in tracking this
+down.
+
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/ipv4/route.c |    2 +-
+ net/ipv6/route.c |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -162,7 +162,7 @@ static struct dst_ops ipv4_dst_ops = {
+       .negative_advice =      ipv4_negative_advice,
+       .link_failure =         ipv4_link_failure,
+       .update_pmtu =          ip_rt_update_pmtu,
+-      .local_out =            ip_local_out,
++      .local_out =            __ip_local_out,
+       .entry_size =           sizeof(struct rtable),
+       .entries =              ATOMIC_INIT(0),
+ };
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -105,7 +105,7 @@ static struct dst_ops ip6_dst_ops = {
+       .negative_advice        =       ip6_negative_advice,
+       .link_failure           =       ip6_link_failure,
+       .update_pmtu            =       ip6_rt_update_pmtu,
+-      .local_out              =       ip6_local_out,
++      .local_out              =       __ip6_local_out,
+       .entry_size             =       sizeof(struct rt6_info),
+       .entries                =       ATOMIC_INIT(0),
+ };
diff --git a/queue-2.6.25/l2tp-avoid-skb-truesize-bug-if-headroom-is-increased.patch b/queue-2.6.25/l2tp-avoid-skb-truesize-bug-if-headroom-is-increased.patch
new file mode 100644 (file)
index 0000000..200cdf3
--- /dev/null
@@ -0,0 +1,75 @@
+From 6ea010d4b9ba1e701f170ad769df8764876f1fd5 Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: James Chapman <jchapman@katalix.com>
+Date: Mon, 19 May 2008 14:10:01 -0700
+Subject: l2tp: avoid skb truesize bug if headroom is increased
+
+From: James Chapman <jchapman@katalix.com>
+
+[ upstream commit: 090c48d3dd5ea90b37350334aaed9a93b0c1e0a1 ]
+
+A user reported seeing occasional bugs such as the following when
+using the L2TP driver.
+
+  SKB BUG: Invalid truesize (272) len=72, sizeof(sk_buff)=208
+
+When L2TP adds its header in the transmit path, it might need to
+increase the headroom of the skb. In some cases, the increased
+headroom trips a kernel bug when the skb is freed because the skb has
+grown beyond its truesize value. The fix is to increase the truesize
+by the amount of headroom added, after orphaning the skb.
+
+While here, fix a misleading comment.
+
+Thanks to Iouri Kharon <bc-info@styx.cabel.net> for the initial
+report and testing the fix.
+
+Signed-off-by: James Chapman <jchapman@katalix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ drivers/net/pppol2tp.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/pppol2tp.c
++++ b/drivers/net/pppol2tp.c
+@@ -980,6 +980,8 @@ static int pppol2tp_xmit(struct ppp_chan
+       __wsum csum = 0;
+       struct udphdr *uh;
+       unsigned int len;
++      int old_headroom;
++      int new_headroom;
+       if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+               goto abort;
+@@ -1001,16 +1003,18 @@ static int pppol2tp_xmit(struct ppp_chan
+       /* Check that there's enough headroom in the skb to insert IP,
+        * UDP and L2TP and PPP headers. If not enough, expand it to
+-       * make room. Note that a new skb (or a clone) is
+-       * allocated. If we return an error from this point on, make
+-       * sure we free the new skb but do not free the original skb
+-       * since that is done by the caller for the error case.
++       * make room. Adjust truesize.
+        */
+       headroom = NET_SKB_PAD + sizeof(struct iphdr) +
+               sizeof(struct udphdr) + hdr_len + sizeof(ppph);
++      old_headroom = skb_headroom(skb);
+       if (skb_cow_head(skb, headroom))
+               goto abort;
++      new_headroom = skb_headroom(skb);
++      skb_orphan(skb);
++      skb->truesize += new_headroom - old_headroom;
++
+       /* Setup PPP header */
+       __skb_push(skb, sizeof(ppph));
+       skb->data[0] = ppph[0];
+@@ -1065,7 +1069,6 @@ static int pppol2tp_xmit(struct ppp_chan
+       /* Get routing info from the tunnel socket */
+       dst_release(skb->dst);
+       skb->dst = dst_clone(__sk_dst_get(sk_tun));
+-      skb_orphan(skb);
+       skb->sk = sk_tun;
+       /* Queue the packet to IP for output */
diff --git a/queue-2.6.25/l2tp-fix-possible-oops-if-transmitting-or-receiving-when-tunnel-goes-down.patch b/queue-2.6.25/l2tp-fix-possible-oops-if-transmitting-or-receiving-when-tunnel-goes-down.patch
new file mode 100644 (file)
index 0000000..525290b
--- /dev/null
@@ -0,0 +1,323 @@
+From b3e1a39be32ec30dacfa545b4e019180c390d141 Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: James Chapman <jchapman@katalix.com>
+Date: Mon, 9 Jun 2008 13:35:41 -0700
+Subject: l2tp: Fix possible oops if transmitting or receiving when tunnel goes down
+
+From: James Chapman <jchapman@katalix.com>
+
+[ upstream commit: 24b95685ffcdb3dc28f64b9e8af6ea3e8360fbc5 ]
+
+Some problems have been experienced in the field which cause an oops
+in the pppol2tp driver if L2TP tunnels fail while passing data.
+
+The pppol2tp driver uses private data that is referenced via the
+sk->sk_user_data of its UDP and PPPoL2TP sockets. This patch makes
+sure that the driver uses sock_hold() when it holds a reference to the
+sk pointer. This affects its sendmsg(), recvmsg(), getname(),
+[gs]etsockopt() and ioctl() handlers.
+
+Tested by ISP where problem was seen. System has been up 10 days with
+no oops since running this patch. Without the patch, an oops would
+occur every 1-2 days.
+
+Signed-off-by: James Chapman <jchapman@katalix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ drivers/net/pppol2tp.c |  101 +++++++++++++++++++++++++++++++++++++------------
+ 1 file changed, 78 insertions(+), 23 deletions(-)
+
+--- a/drivers/net/pppol2tp.c
++++ b/drivers/net/pppol2tp.c
+@@ -240,12 +240,15 @@ static inline struct pppol2tp_session *p
+       if (sk == NULL)
+               return NULL;
++      sock_hold(sk);
+       session = (struct pppol2tp_session *)(sk->sk_user_data);
+-      if (session == NULL)
+-              return NULL;
++      if (session == NULL) {
++              sock_put(sk);
++              goto out;
++      }
+       BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+-
++out:
+       return session;
+ }
+@@ -256,12 +259,15 @@ static inline struct pppol2tp_tunnel *pp
+       if (sk == NULL)
+               return NULL;
++      sock_hold(sk);
+       tunnel = (struct pppol2tp_tunnel *)(sk->sk_user_data);
+-      if (tunnel == NULL)
+-              return NULL;
++      if (tunnel == NULL) {
++              sock_put(sk);
++              goto out;
++      }
+       BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+-
++out:
+       return tunnel;
+ }
+@@ -716,12 +722,14 @@ discard:
+       session->stats.rx_errors++;
+       kfree_skb(skb);
+       sock_put(session->sock);
++      sock_put(sock);
+       return 0;
+ error:
+       /* Put UDP header back */
+       __skb_push(skb, sizeof(struct udphdr));
++      sock_put(sock);
+ no_tunnel:
+       return 1;
+@@ -745,10 +753,13 @@ static int pppol2tp_udp_encap_recv(struc
+              "%s: received %d bytes\n", tunnel->name, skb->len);
+       if (pppol2tp_recv_core(sk, skb))
+-              goto pass_up;
++              goto pass_up_put;
++      sock_put(sk);
+       return 0;
++pass_up_put:
++      sock_put(sk);
+ pass_up:
+       return 1;
+ }
+@@ -858,7 +869,7 @@ static int pppol2tp_sendmsg(struct kiocb
+       tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+       if (tunnel == NULL)
+-              goto error;
++              goto error_put_sess;
+       /* What header length is configured for this session? */
+       hdr_len = pppol2tp_l2tp_header_len(session);
+@@ -870,7 +881,7 @@ static int pppol2tp_sendmsg(struct kiocb
+                          sizeof(ppph) + total_len,
+                          0, GFP_KERNEL);
+       if (!skb)
+-              goto error;
++              goto error_put_sess_tun;
+       /* Reserve space for headers. */
+       skb_reserve(skb, NET_SKB_PAD);
+@@ -900,7 +911,7 @@ static int pppol2tp_sendmsg(struct kiocb
+       error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
+       if (error < 0) {
+               kfree_skb(skb);
+-              goto error;
++              goto error_put_sess_tun;
+       }
+       skb_put(skb, total_len);
+@@ -947,10 +958,33 @@ static int pppol2tp_sendmsg(struct kiocb
+               session->stats.tx_errors++;
+       }
++      return error;
++
++error_put_sess_tun:
++      sock_put(session->tunnel_sock);
++error_put_sess:
++      sock_put(sk);
+ error:
+       return error;
+ }
++/* Automatically called when the skb is freed.
++ */
++static void pppol2tp_sock_wfree(struct sk_buff *skb)
++{
++      sock_put(skb->sk);
++}
++
++/* For data skbs that we transmit, we associate with the tunnel socket
++ * but don't do accounting.
++ */
++static inline void pppol2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
++{
++      sock_hold(sk);
++      skb->sk = sk;
++      skb->destructor = pppol2tp_sock_wfree;
++}
++
+ /* Transmit function called by generic PPP driver.  Sends PPP frame
+  * over PPPoL2TP socket.
+  *
+@@ -993,10 +1027,10 @@ static int pppol2tp_xmit(struct ppp_chan
+       sk_tun = session->tunnel_sock;
+       if (sk_tun == NULL)
+-              goto abort;
++              goto abort_put_sess;
+       tunnel = pppol2tp_sock_to_tunnel(sk_tun);
+       if (tunnel == NULL)
+-              goto abort;
++              goto abort_put_sess;
+       /* What header length is configured for this session? */
+       hdr_len = pppol2tp_l2tp_header_len(session);
+@@ -1009,7 +1043,7 @@ static int pppol2tp_xmit(struct ppp_chan
+               sizeof(struct udphdr) + hdr_len + sizeof(ppph);
+       old_headroom = skb_headroom(skb);
+       if (skb_cow_head(skb, headroom))
+-              goto abort;
++              goto abort_put_sess_tun;
+       new_headroom = skb_headroom(skb);
+       skb_orphan(skb);
+@@ -1069,7 +1103,7 @@ static int pppol2tp_xmit(struct ppp_chan
+       /* Get routing info from the tunnel socket */
+       dst_release(skb->dst);
+       skb->dst = dst_clone(__sk_dst_get(sk_tun));
+-      skb->sk = sk_tun;
++      pppol2tp_skb_set_owner_w(skb, sk_tun);
+       /* Queue the packet to IP for output */
+       len = skb->len;
+@@ -1086,8 +1120,14 @@ static int pppol2tp_xmit(struct ppp_chan
+               session->stats.tx_errors++;
+       }
++      sock_put(sk_tun);
++      sock_put(sk);
+       return 1;
++abort_put_sess_tun:
++      sock_put(sk_tun);
++abort_put_sess:
++      sock_put(sk);
+ abort:
+       /* Free the original skb */
+       kfree_skb(skb);
+@@ -1191,7 +1231,7 @@ static void pppol2tp_tunnel_destruct(str
+ {
+       struct pppol2tp_tunnel *tunnel;
+-      tunnel = pppol2tp_sock_to_tunnel(sk);
++      tunnel = sk->sk_user_data;
+       if (tunnel == NULL)
+               goto end;
+@@ -1230,10 +1270,12 @@ static void pppol2tp_session_destruct(st
+       if (sk->sk_user_data != NULL) {
+               struct pppol2tp_tunnel *tunnel;
+-              session = pppol2tp_sock_to_session(sk);
++              session = sk->sk_user_data;
+               if (session == NULL)
+                       goto out;
++              BUG_ON(session->magic != L2TP_SESSION_MAGIC);
++
+               /* Don't use pppol2tp_sock_to_tunnel() here to
+                * get the tunnel context because the tunnel
+                * socket might have already been closed (its
+@@ -1611,7 +1653,7 @@ static int pppol2tp_connect(struct socke
+       error = ppp_register_channel(&po->chan);
+       if (error)
+-              goto end;
++              goto end_put_tun;
+       /* This is how we get the session context from the socket. */
+       sk->sk_user_data = session;
+@@ -1631,6 +1673,8 @@ out_no_ppp:
+       PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+              "%s: created\n", session->name);
++end_put_tun:
++      sock_put(tunnel_sock);
+ end:
+       release_sock(sk);
+@@ -1671,6 +1715,7 @@ static int pppol2tp_getname(struct socke
+       *usockaddr_len = len;
+       error = 0;
++      sock_put(sock->sk);
+ end:
+       return error;
+@@ -1909,14 +1954,17 @@ static int pppol2tp_ioctl(struct socket 
+               err = -EBADF;
+               tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+               if (tunnel == NULL)
+-                      goto end;
++                      goto end_put_sess;
+               err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
+-              goto end;
++              sock_put(session->tunnel_sock);
++              goto end_put_sess;
+       }
+       err = pppol2tp_session_ioctl(session, cmd, arg);
++end_put_sess:
++      sock_put(sk);
+ end:
+       return err;
+ }
+@@ -2062,14 +2110,17 @@ static int pppol2tp_setsockopt(struct so
+               err = -EBADF;
+               tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+               if (tunnel == NULL)
+-                      goto end;
++                      goto end_put_sess;
+               err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
++              sock_put(session->tunnel_sock);
+       } else
+               err = pppol2tp_session_setsockopt(sk, session, optname, val);
+       err = 0;
++end_put_sess:
++      sock_put(sk);
+ end:
+       return err;
+ }
+@@ -2184,20 +2235,24 @@ static int pppol2tp_getsockopt(struct so
+               err = -EBADF;
+               tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+               if (tunnel == NULL)
+-                      goto end;
++                      goto end_put_sess;
+               err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
++              sock_put(session->tunnel_sock);
+       } else
+               err = pppol2tp_session_getsockopt(sk, session, optname, &val);
+       err = -EFAULT;
+       if (put_user(len, (int __user *) optlen))
+-              goto end;
++              goto end_put_sess;
+       if (copy_to_user((void __user *) optval, &val, len))
+-              goto end;
++              goto end_put_sess;
+       err = 0;
++
++end_put_sess:
++      sock_put(sk);
+ end:
+       return err;
+ }
diff --git a/queue-2.6.25/l2tp-fix-possible-warn_on-from-socket-code-when-udp-socket-is-closed.patch b/queue-2.6.25/l2tp-fix-possible-warn_on-from-socket-code-when-udp-socket-is-closed.patch
new file mode 100644 (file)
index 0000000..6db929b
--- /dev/null
@@ -0,0 +1,66 @@
+From 6000afc70ddd62a24cf3aa636b066f518157998c Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: James Chapman <jchapman@katalix.com>
+Date: Mon, 9 Jun 2008 13:34:39 -0700
+Subject: l2tp: Fix possible WARN_ON from socket code when UDP socket is closed
+
+From: James Chapman <jchapman@katalix.com>
+
+[ upstream commit: 199f7d24ae59894243687a234a909f44a8724506 ]
+
+If an L2TP daemon closes a tunnel socket while packets are queued in
+the tunnel's reorder queue, a kernel warning is logged because the
+socket is closed while skbs are still referencing it. The fix is to
+purge the queue in the socket's release handler.
+
+WARNING: at include/net/sock.h:351 udp_lib_unhash+0x41/0x68()
+Pid: 12998, comm: openl2tpd Not tainted 2.6.25 #8
+ [<c0423c58>] warn_on_slowpath+0x41/0x51
+ [<c05d33a7>] udp_lib_unhash+0x41/0x68
+ [<c059424d>] sk_common_release+0x23/0x90
+ [<c05d16be>] udp_lib_close+0x8/0xa
+ [<c05d8684>] inet_release+0x42/0x48
+ [<c0592599>] sock_release+0x14/0x60
+ [<c059299f>] sock_close+0x29/0x30
+ [<c046ef52>] __fput+0xad/0x15b
+ [<c046f1d9>] fput+0x17/0x19
+ [<c046c8c4>] filp_close+0x50/0x5a
+ [<c046da06>] sys_close+0x69/0x9f
+ [<c04048ce>] syscall_call+0x7/0xb
+
+Signed-off-by: James Chapman <jchapman@katalix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ drivers/net/pppol2tp.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/drivers/net/pppol2tp.c
++++ b/drivers/net/pppol2tp.c
+@@ -1279,6 +1279,7 @@ out:
+ static int pppol2tp_release(struct socket *sock)
+ {
+       struct sock *sk = sock->sk;
++      struct pppol2tp_session *session;
+       int error;
+       if (!sk)
+@@ -1296,9 +1297,18 @@ static int pppol2tp_release(struct socke
+       sock_orphan(sk);
+       sock->sk = NULL;
++      session = pppol2tp_sock_to_session(sk);
++
+       /* Purge any queued data */
+       skb_queue_purge(&sk->sk_receive_queue);
+       skb_queue_purge(&sk->sk_write_queue);
++      if (session != NULL) {
++              struct sk_buff *skb;
++              while ((skb = skb_dequeue(&session->reorder_q))) {
++                      kfree_skb(skb);
++                      sock_put(sk);
++              }
++      }
+       release_sock(sk);
diff --git a/queue-2.6.25/net-fix-call-to-change_rx_flags-in-dev_change_flags.patch b/queue-2.6.25/net-fix-call-to-change_rx_flags-in-dev_change_flags.patch
new file mode 100644 (file)
index 0000000..273cc57
--- /dev/null
@@ -0,0 +1,36 @@
+From b9b704db3c0cde4a25b85501d2c9d650d8be9e1d Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: David Woodhouse <dwmw2@infradead.org>
+Date: Tue, 20 May 2008 14:36:14 -0700
+Subject: net: Fix call to ->change_rx_flags(dev, IFF_MULTICAST) in dev_change_flags()
+
+From: David Woodhouse <dwmw2@infradead.org>
+
+[ upstream commit: 0e91796eb46e29edc791131c832a2232bcaed9dd ]
+
+Am I just being particularly dim today, or can the call to
+dev->change_rx_flags(dev, IFF_MULTICAST) in dev_change_flags() never
+happen?
+
+We've just set dev->flags = flags & IFF_MULTICAST, effectively. So the
+condition '(dev->flags ^ flags) & IFF_MULTICAST' is _never_ going to be
+true.
+
+Signed-off-by: David Woodhouse <dwmw2@infradead.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/core/dev.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3132,7 +3132,7 @@ int dev_change_flags(struct net_device *
+        *      Load in the correct multicast list now the flags have changed.
+        */
+-      if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST)
++      if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST)
+               dev->change_rx_flags(dev, IFF_MULTICAST);
+       dev_set_rx_mode(dev);
diff --git a/queue-2.6.25/net_sched-cls_api-fix-return-value-for-non-existant-classifiers.patch b/queue-2.6.25/net_sched-cls_api-fix-return-value-for-non-existant-classifiers.patch
new file mode 100644 (file)
index 0000000..df6dd34
--- /dev/null
@@ -0,0 +1,31 @@
+From 44a02f11d1d7f26e1ae811009b7ef5a657c2056d Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: Patrick McHardy <kaber@trash.net>
+Date: Tue, 20 May 2008 14:34:46 -0700
+Subject: net_sched: cls_api: fix return value for non-existant classifiers
+
+From: Patrick McHardy <kaber@trash.net>
+
+[ upstream commit: f2df824948d559ea818e03486a8583e42ea6ab37 ]
+
+cls_api should return ENOENT when the requested classifier doesn't
+exist.
+
+Signed-off-by: Patrick McHardy <kaber@trash.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/sched/cls_api.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -220,7 +220,7 @@ replay:
+               tp = kzalloc(sizeof(*tp), GFP_KERNEL);
+               if (tp == NULL)
+                       goto errout;
+-              err = -EINVAL;
++              err = -ENOENT;
+               tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
+               if (tp_ops == NULL) {
+ #ifdef CONFIG_KMOD
diff --git a/queue-2.6.25/netlink-fix-nla_parse_nested_compat-to-call-nla_parse-directly.patch b/queue-2.6.25/netlink-fix-nla_parse_nested_compat-to-call-nla_parse-directly.patch
new file mode 100644 (file)
index 0000000..42719dc
--- /dev/null
@@ -0,0 +1,50 @@
+From f48a77e02822767a51915454a6cc5feba39e0c53 Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: Thomas Graf <tgraf@suug.ch>
+Date: Thu, 22 May 2008 10:48:59 -0700
+Subject: netlink: Fix nla_parse_nested_compat() to call nla_parse() directly
+
+From: Thomas Graf <tgraf@suug.ch>
+
+[ upstream commit: b9a2f2e450b0f770bb4347ae8d48eb2dea701e24 ]
+
+The purpose of nla_parse_nested_compat() is to parse attributes which
+contain a struct followed by a stream of nested attributes.  So far,
+it called nla_parse_nested() to parse the stream of nested attributes
+which was wrong, as nla_parse_nested() expects a container attribute
+as data which holds the attribute stream.  It needs to call
+nla_parse() directly while pointing at the next possible alignment
+point after the struct in the beginning of the attribute.
+
+With this patch, I can no longer reproduce the reported leftover
+warnings.
+
+Signed-off-by: Thomas Graf <tgraf@suug.ch>
+Acked-by: Patrick McHardy <kaber@trash.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ include/net/netlink.h |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/include/net/netlink.h
++++ b/include/net/netlink.h
+@@ -772,12 +772,13 @@ static inline int __nla_parse_nested_com
+                                           const struct nla_policy *policy,
+                                           int len)
+ {
+-      if (nla_len(nla) < len)
++      int nested_len = nla_len(nla) - NLA_ALIGN(len);
++
++      if (nested_len < 0)
+               return -1;
+-      if (nla_len(nla) >= NLA_ALIGN(len) + sizeof(struct nlattr))
+-              return nla_parse_nested(tb, maxtype,
+-                                      nla_data(nla) + NLA_ALIGN(len),
+-                                      policy);
++      if (nested_len >= nla_attr_size(0))
++              return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
++                               nested_len, policy);
+       memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+       return 0;
+ }
index 58213d7b57b8ca861b4605a5a255cc3f510716e8..a9610071ab577fa1a6d52ffc256a9da89eee6fb3 100644 (file)
@@ -11,3 +11,23 @@ ecryptfs-clean-up-lock_parent.patch
 ecryptfs-fix-missed-mutex_unlock.patch
 ps3-fix-frame-buffer-build-error.patch
 sunhv-fix-locking-in-non-paged-i-o-case.patch
+af_key-fix-selector-family-initialization.patch
+ax25-fix-null-pointer-dereference-and-lockup.patch
+bluetooth-fix-locking-bug-in-the-rfcomm-socket-cleanup-handling.patch
+can-fix-copy_from_user-results-interpretation.patch
+cassini-only-use-chip-checksum-for-ipv4-packets.patch
+net-fix-call-to-change_rx_flags-in-dev_change_flags.patch
+net_sched-cls_api-fix-return-value-for-non-existant-classifiers.patch
+ipsec-use-the-correct-ip_local_out-function.patch
+netlink-fix-nla_parse_nested_compat-to-call-nla_parse-directly.patch
+l2tp-avoid-skb-truesize-bug-if-headroom-is-increased.patch
+vlan-correctly-handle-device-notifications-for-layered-vlan-devices.patch
+tcp-tcp-connection-times-out-if-icmp-frag-needed-is-delayed.patch
+tcp-allow-send-limited-cwnd-to-grow-up-to-max_burst-when-gso-disabled.patch
+tcp-limit-cwnd-growth-when-deferring-for-gso.patch
+l2tp-fix-possible-warn_on-from-socket-code-when-udp-socket-is-closed.patch
+l2tp-fix-possible-oops-if-transmitting-or-receiving-when-tunnel-goes-down.patch
+tcp-fix-skb-vs-fack_count-out-of-sync-condition.patch
+tcp-frto-fix-fallback-to-conventional-recovery.patch
+tcp-frto-sack-variant-is-errorneously-used-with-newreno.patch
+tcp-frto-work-around-inorder-receivers.patch
diff --git a/queue-2.6.25/tcp-allow-send-limited-cwnd-to-grow-up-to-max_burst-when-gso-disabled.patch b/queue-2.6.25/tcp-allow-send-limited-cwnd-to-grow-up-to-max_burst-when-gso-disabled.patch
new file mode 100644 (file)
index 0000000..80f4a03
--- /dev/null
@@ -0,0 +1,42 @@
+From 94f3671135bdc0d573dd4f7731dda9072b25a23a Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: John Heffner <johnwheffner@gmail.com>
+Date: Tue, 29 Apr 2008 03:13:02 -0700
+Subject: tcp: Allow send-limited cwnd to grow up to max_burst when gso disabled
+
+From: John Heffner <johnwheffner@gmail.com>
+
+[ upstream commit: ce447eb91409225f8a488f6b7b2a1bdf7b2d884f ]
+
+This changes the logic in tcp_is_cwnd_limited() so that cwnd may grow
+up to tcp_max_burst() even when sk_can_gso() is false, or when
+sysctl_tcp_tso_win_divisor != 0.
+
+Signed-off-by: John Heffner <johnwheffner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/ipv4/tcp_cong.c |   11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+--- a/net/ipv4/tcp_cong.c
++++ b/net/ipv4/tcp_cong.c
+@@ -285,14 +285,11 @@ int tcp_is_cwnd_limited(const struct soc
+       if (in_flight >= tp->snd_cwnd)
+               return 1;
+-      if (!sk_can_gso(sk))
+-              return 0;
+-
+       left = tp->snd_cwnd - in_flight;
+-      if (sysctl_tcp_tso_win_divisor)
+-              return left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd;
+-      else
+-              return left <= tcp_max_burst(tp);
++      if (sk_can_gso(sk) &&
++          left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd)
++              return 1;
++      return left <= tcp_max_burst(tp);
+ }
+ EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
diff --git a/queue-2.6.25/tcp-fix-skb-vs-fack_count-out-of-sync-condition.patch b/queue-2.6.25/tcp-fix-skb-vs-fack_count-out-of-sync-condition.patch
new file mode 100644 (file)
index 0000000..b5ebcb9
--- /dev/null
@@ -0,0 +1,70 @@
+From 623af2c3a3cd84450306051a8fcba0a962868942 Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+Date: Wed, 4 Jun 2008 12:07:44 -0700
+Subject: tcp: fix skb vs fack_count out-of-sync condition
+
+From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+
+[ upstream commit: a6604471db5e7a33474a7f16c64d6b118fae3e74 ]
+
+This bug is able to corrupt fackets_out in very rare cases.
+In order for this to cause corruption:
+  1) DSACK in the middle of previous SACK block must be generated.
+  2) In order to take that particular branch, part or all of the
+     DSACKed segment must already be SACKed so that we have that
+     in cache in the first place.
+  3) The new info must be top enough so that fackets_out will be
+     updated on this iteration.
+...then fack_count is updated while skb wasn't, then we walk again
+that particular segment thus updating fack_count twice for
+a single skb and finally that value is assigned to fackets_out
+by tcp_sacktag_one.
+
+It is safe to call tcp_sacktag_one just once for a segment (at
+DSACK), no need to call again for plain SACK.
+
+Potential problem of the miscount are limited to premature entry
+to recovery and to inflated reordering metric (which could even
+cancel each other out in the most the luckiest scenarios :-)).
+Both are quite insignificant in worst case too and there exists
+also code to reset them (fackets_out once sacked_out becomes zero
+and reordering metric on RTO).
+
+This has been reported by a number of people, because it occurred
+quite rarely, it has been very evasive. Andy Furniss was able to
+get it to occur couple of times so that a bit more info was
+collected about the problem using a debug patch, though it still
+required lot of checking around. Thanks also to others who have
+tried to help here.
+
+This is listed as Bugzilla #10346. The bug was introduced by
+me in commit 68f8353b48 ([TCP]: Rewrite SACK block processing &
+sack_recv_cache use), I probably thought back then that there's
+need to scan that entry twice or didn't dare to make it go
+through it just once there. Going through twice would have
+required restoring fack_count after the walk but as noted above,
+I chose to drop the additional walk step altogether here.
+
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/ipv4/tcp_input.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1393,9 +1393,9 @@ static struct sk_buff *tcp_maybe_skippin
+       if (before(next_dup->start_seq, skip_to_seq)) {
+               skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count);
+-              tcp_sacktag_walk(skb, sk, NULL,
+-                               next_dup->start_seq, next_dup->end_seq,
+-                               1, fack_count, reord, flag);
++              skb = tcp_sacktag_walk(skb, sk, NULL,
++                                   next_dup->start_seq, next_dup->end_seq,
++                                   1, fack_count, reord, flag);
+       }
+       return skb;
diff --git a/queue-2.6.25/tcp-frto-fix-fallback-to-conventional-recovery.patch b/queue-2.6.25/tcp-frto-fix-fallback-to-conventional-recovery.patch
new file mode 100644 (file)
index 0000000..981edc6
--- /dev/null
@@ -0,0 +1,56 @@
+From ab453ba3be3db9b46c8f37c0bf55d6b6811a2b78 Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+Date: Tue, 13 May 2008 02:53:26 -0700
+Subject: tcp FRTO: Fix fallback to conventional recovery
+
+From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+
+[ upstream commit: a1c1f281b84a751fdb5ff919da3b09df7297619f ]
+
+It seems that commit 009a2e3e4ec ("[TCP] FRTO: Improve
+interoperability with other undo_marker users") run into
+another land-mine which caused fallback to conventional
+recovery to break:
+
+1. Cumulative ACK arrives after FRTO retransmission
+2. tcp_try_to_open sees zero retrans_out, clears retrans_stamp
+   which should be kept like in CA_Loss state it would be
+3. undo_marker change allowed tcp_packet_delayed to return
+   true because of the cleared retrans_stamp once FRTO is
+   terminated causing LossUndo to occur, which means all loss
+   markings FRTO made are reverted.
+
+This means that the conventional recovery basically recovered
+one loss per RTT, which is not that efficient. It was quite
+unobvious that the undo_marker change broken something like
+this, I had a quite long session to track it down because of
+the non-intuitiviness of the bug (luckily I had a trivial
+reproducer at hand and I was also able to learn to use kprobes
+in the process as well :-)).
+
+This together with the NewReno+FRTO fix and FRTO in-order
+workaround this fixes Damon's problems, this and the first
+mentioned are enough to fix Bugzilla #10063.
+
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+Reported-by: Damon L. Chesser <damon@damtek.com>
+Tested-by: Damon L. Chesser <damon@damtek.com>
+Tested-by: Sebastian Hyrwall <zibbe@cisko.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/ipv4/tcp_input.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2465,7 +2465,7 @@ static void tcp_try_to_open(struct sock 
+       tcp_verify_left_out(tp);
+-      if (tp->retrans_out == 0)
++      if (!tp->frto_counter && tp->retrans_out == 0)
+               tp->retrans_stamp = 0;
+       if (flag & FLAG_ECE)
diff --git a/queue-2.6.25/tcp-frto-sack-variant-is-errorneously-used-with-newreno.patch b/queue-2.6.25/tcp-frto-sack-variant-is-errorneously-used-with-newreno.patch
new file mode 100644 (file)
index 0000000..f8a5208
--- /dev/null
@@ -0,0 +1,105 @@
+From 014a7ae5645af49ada9b3ad9aaef57487d1a29ba Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+Date: Thu, 8 May 2008 01:09:11 -0700
+Subject: tcp FRTO: SACK variant is errorneously used with NewReno
+
+From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+
+[ upstream commit: 62ab22278308a40bcb7f4079e9719ab8b7fe11b5 ]
+
+Note: there's actually another bug in FRTO's SACK variant, which
+is the causing failure in NewReno case because of the error
+that's fixed here. I'll fix the SACK case separately (it's
+a separate bug really, though related, but in order to fix that
+I need to audit tp->snd_nxt usage a bit).
+
+There were two places where SACK variant of FRTO is getting
+incorrectly used even if SACK wasn't negotiated by the TCP flow.
+This leads to incorrect setting of frto_highmark with NewReno
+if a previous recovery was interrupted by another RTO.
+
+An eventual fallback to conventional recovery then incorrectly
+considers one or couple of segments as forward transmissions
+though they weren't, which then are not LOST marked during
+fallback making them "non-retransmittable" until the next RTO.
+In a bad case, those segments are really lost and are the only
+one left in the window. Thus TCP needs another RTO to continue.
+The next FRTO, however, could again repeat the same events
+making the progress of the TCP flow extremely slow.
+
+In order for these events to occur at all, FRTO must occur
+again in FRTOs step 3 while the key segments must be lost as
+well, which is not too likely in practice. It seems to most
+frequently with some small devices such as network printers
+that *seem* to accept TCP segments only in-order. In cases
+were key segments weren't lost, things get automatically
+resolved because those wrongly marked segments don't need to be
+retransmitted in order to continue.
+
+I found a reproducer after digging up relevant reports (few
+reports in total, none at netdev or lkml I know of), some
+cases seemed to indicate middlebox issues which seems now
+to be a false assumption some people had made. Bugzilla
+#10063 _might_ be related. Damon L. Chesser <damon@damtek.com>
+had a reproducable case and was kind enough to tcpdump it
+for me. With the tcpdump log it was quite trivial to figure
+out.
+
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/ipv4/tcp_input.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -113,8 +113,6 @@ int sysctl_tcp_abc __read_mostly;
+ #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
+ #define FLAG_ANY_PROGRESS     (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
+-#define IsSackFrto() (sysctl_tcp_frto == 0x2)
+-
+ #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
+ #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
+@@ -1685,6 +1683,11 @@ static inline void tcp_reset_reno_sack(s
+       tp->sacked_out = 0;
+ }
++static int tcp_is_sackfrto(const struct tcp_sock *tp)
++{
++      return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
++}
++
+ /* F-RTO can only be used if TCP has never retransmitted anything other than
+  * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
+  */
+@@ -1701,7 +1704,7 @@ int tcp_use_frto(struct sock *sk)
+       if (icsk->icsk_mtup.probe_size)
+               return 0;
+-      if (IsSackFrto())
++      if (tcp_is_sackfrto(tp))
+               return 1;
+       /* Avoid expensive walking of rexmit queue if possible */
+@@ -1791,7 +1794,7 @@ void tcp_enter_frto(struct sock *sk)
+       /* Earlier loss recovery underway (see RFC4138; Appendix B).
+        * The last condition is necessary at least in tp->frto_counter case.
+        */
+-      if (IsSackFrto() && (tp->frto_counter ||
++      if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
+           ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
+           after(tp->high_seq, tp->snd_una)) {
+               tp->frto_highmark = tp->high_seq;
+@@ -3110,7 +3113,7 @@ static int tcp_process_frto(struct sock 
+               return 1;
+       }
+-      if (!IsSackFrto() || tcp_is_reno(tp)) {
++      if (!tcp_is_sackfrto(tp)) {
+               /* RFC4138 shortcoming in step 2; should also have case c):
+                * ACK isn't duplicate nor advances window, e.g., opposite dir
+                * data, winupdate
diff --git a/queue-2.6.25/tcp-frto-work-around-inorder-receivers.patch b/queue-2.6.25/tcp-frto-work-around-inorder-receivers.patch
new file mode 100644 (file)
index 0000000..8459149
--- /dev/null
@@ -0,0 +1,68 @@
+From 497157281d4368c61f53846646ec89ce4175839c Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+Date: Tue, 13 May 2008 02:54:19 -0700
+Subject: tcp FRTO: work-around inorder receivers
+
+From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+
+[ upstream commit: 79d44516b4b178ffb6e2159c75584cfcfc097914 ]
+
+If receiver consumes segments successfully only in-order, FRTO
+fallback to conventional recovery produces RTO loop because
+FRTO's forward transmissions will always get dropped and need to
+be resent, yet by default they're not marked as lost (which are
+the only segments we will retransmit in CA_Loss).
+
+Price to pay about this is occassionally unnecessarily
+retransmitting the forward transmission(s). SACK blocks help
+a bit to avoid this, so it's mainly a concern for NewReno case
+though SACK is not fully immune either.
+
+This change has a side-effect of fixing SACKFRTO problem where
+it didn't have snd_nxt of the RTO time available anymore when
+fallback become necessary (this problem would have only occured
+when RTO would occur for two or more segments and ECE arrives
+in step 3; no need to figure out how to fix that unless the
+TODO item of selective behavior is considered in future).
+
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+Reported-by: Damon L. Chesser <damon@damtek.com>
+Tested-by: Damon L. Chesser <damon@damtek.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/ipv4/tcp_input.c |   15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1841,9 +1841,16 @@ static void tcp_enter_frto_loss(struct s
+                       TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+               }
+-              /* Don't lost mark skbs that were fwd transmitted after RTO */
+-              if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) &&
+-                  !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) {
++              /* Marking forward transmissions that were made after RTO lost
++               * can cause unnecessary retransmissions in some scenarios,
++               * SACK blocks will mitigate that in some but not in all cases.
++               * We used to not mark them but it was causing break-ups with
++               * receivers that do only in-order receival.
++               *
++               * TODO: we could detect presence of such receiver and select
++               * different behavior per flow.
++               */
++              if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
+                       TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+                       tp->lost_out += tcp_skb_pcount(skb);
+               }
+@@ -1859,7 +1866,7 @@ static void tcp_enter_frto_loss(struct s
+       tp->reordering = min_t(unsigned int, tp->reordering,
+                              sysctl_tcp_reordering);
+       tcp_set_ca_state(sk, TCP_CA_Loss);
+-      tp->high_seq = tp->frto_highmark;
++      tp->high_seq = tp->snd_nxt;
+       TCP_ECN_queue_cwr(tp);
+       tcp_clear_retrans_hints_partial(tp);
diff --git a/queue-2.6.25/tcp-limit-cwnd-growth-when-deferring-for-gso.patch b/queue-2.6.25/tcp-limit-cwnd-growth-when-deferring-for-gso.patch
new file mode 100644 (file)
index 0000000..656d9ff
--- /dev/null
@@ -0,0 +1,34 @@
+From fdd040a9f8f7fb8152d8101a34f147daf0b3a003 Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: John Heffner <johnwheffner@gmail.com>
+Date: Tue, 29 Apr 2008 03:13:52 -0700
+Subject: tcp: Limit cwnd growth when deferring for GSO
+
+From: John Heffner <johnwheffner@gmail.com>
+
+[ upstream commit: 246eb2af060fc32650f07203c02bdc0456ad76c7 ]
+
+This fixes inappropriately large cwnd growth on sender-limited flows
+when GSO is enabled, limiting cwnd growth to 64k.
+
+[ Backport to 2.6.25 by replacing sk->sk_gso_max_size with 65536 -DaveM ]
+
+Signed-off-by: John Heffner <johnwheffner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/ipv4/tcp_cong.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_cong.c
++++ b/net/ipv4/tcp_cong.c
+@@ -287,7 +287,8 @@ int tcp_is_cwnd_limited(const struct soc
+       left = tp->snd_cwnd - in_flight;
+       if (sk_can_gso(sk) &&
+-          left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd)
++          left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
++          left * tp->mss_cache < 65536)
+               return 1;
+       return left <= tcp_max_burst(tp);
+ }
diff --git a/queue-2.6.25/tcp-tcp-connection-times-out-if-icmp-frag-needed-is-delayed.patch b/queue-2.6.25/tcp-tcp-connection-times-out-if-icmp-frag-needed-is-delayed.patch
new file mode 100644 (file)
index 0000000..ac2d98f
--- /dev/null
@@ -0,0 +1,74 @@
+From a999cd82c502e723f2504782050d6792d0503ac9 Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: Sridhar Samudrala <sri@us.ibm.com>
+Date: Wed, 21 May 2008 16:42:20 -0700
+Subject: tcp: TCP connection times out if ICMP frag needed is delayed
+
+From: Sridhar Samudrala <sri@us.ibm.com>
+
+[ upstream commit: 7d227cd235c809c36c847d6a597956ad9e9d2bae ]
+
+We are seeing an issue with TCP in handling an ICMP frag needed
+message that is received after net.ipv4.tcp_retries1 retransmits.
+The default value of retries1 is 3. So if the path mtu changes
+and ICMP frag needed is lost for the first 3 retransmits or if
+it gets delayed until 3 retransmits are done, TCP doesn't update
+MSS correctly and continues to retransmit the orginal message
+until it timesout after tcp_retries2 retransmits.
+
+I am seeing this issue even with the latest 2.6.25.4 kernel.
+
+In tcp_retransmit_timer(), when retransmits counter exceeds
+tcp_retries1 value, the dst cache entry of the socket is reset.
+At this time, if we receive an ICMP frag needed message, the
+dst entry gets updated with the new MTU, but the TCP sockets
+dst_cache entry remains NULL.
+
+So the next time when we try to retransmit after the ICMP frag
+needed is received, tcp_retransmit_skb() gets called. Here the
+cur_mss value is calculated at the start of the routine with
+a NULL sk_dst_cache. Instead we should call tcp_current_mss after
+the rebuild_header that caches the dst entry with the updated mtu.
+Also the rebuild_header should be called before tcp_fragment
+so that skb is fragmented if the mss goes down.
+
+Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/ipv4/tcp_output.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1836,7 +1836,7 @@ int tcp_retransmit_skb(struct sock *sk, 
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
+-      unsigned int cur_mss = tcp_current_mss(sk, 0);
++      unsigned int cur_mss;
+       int err;
+       /* Inconslusive MTU probe */
+@@ -1858,6 +1858,11 @@ int tcp_retransmit_skb(struct sock *sk, 
+                       return -ENOMEM;
+       }
++      if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
++              return -EHOSTUNREACH; /* Routing failure or similar. */
++
++      cur_mss = tcp_current_mss(sk, 0);
++
+       /* If receiver has shrunk his window, and skb is out of
+        * new window, do not retransmit it. The exception is the
+        * case, when window is shrunk to zero. In this case
+@@ -1884,9 +1889,6 @@ int tcp_retransmit_skb(struct sock *sk, 
+           (sysctl_tcp_retrans_collapse != 0))
+               tcp_retrans_try_collapse(sk, skb, cur_mss);
+-      if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
+-              return -EHOSTUNREACH; /* Routing failure or similar. */
+-
+       /* Some Solaris stacks overoptimize and ignore the FIN on a
+        * retransmit when old data is attached.  So strip it off
+        * since it is cheap to do so and saves bytes on the network.
diff --git a/queue-2.6.25/vlan-correctly-handle-device-notifications-for-layered-vlan-devices.patch b/queue-2.6.25/vlan-correctly-handle-device-notifications-for-layered-vlan-devices.patch
new file mode 100644 (file)
index 0000000..3854fc3
--- /dev/null
@@ -0,0 +1,38 @@
+From 50e7aa60b098edaceb29c24fa4194427a29719ea Mon Sep 17 00:00:00 2001
+Message-Id: <20080609.134337.193698173.davem@davemloft.net>
+From: Patrick McHardy <kaber@trash.net>
+Date: Mon, 9 Jun 2008 11:42:44 -0700
+Subject: vlan: Correctly handle device notifications for layered VLAN devices
+
+From: Patrick McHardy <kaber@trash.net>
+
+[ upstream commit: 81d85346b3fcd8b3167eac8b5fb415a210bd4345 ]
+
+Commit 30688a9 ([VLAN]: Handle vlan devices net namespace changing)
+changed the device notifier to special-case notifications for VLAN
+devices, effectively disabling state propagation to underlying VLAN
+devices. This is needed for layered VLANs though, so restore the
+original behaviour.
+
+Signed-off-by: Patrick McHardy <kaber@trash.net>
+Acked-by: Pavel Emelyanov <xemul@openvz.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/8021q/vlan.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/net/8021q/vlan.c
++++ b/net/8021q/vlan.c
+@@ -397,10 +397,8 @@ static int vlan_device_event(struct noti
+       if (dev->nd_net != &init_net)
+               return NOTIFY_DONE;
+-      if (is_vlan_dev(dev)) {
++      if (is_vlan_dev(dev))
+               __vlan_device_event(dev, event);
+-              goto out;
+-      }
+       grp = __vlan_find_group(dev->ifindex);
+       if (!grp)