3.0-stable patches

author Greg Kroah-Hartman <gregkh@suse.de>

Tue, 3 Jan 2012 20:50:48 +0000 (12:50 -0800)

committer Greg Kroah-Hartman <gregkh@suse.de>

Tue, 3 Jan 2012 20:50:48 +0000 (12:50 -0800)
author Greg Kroah-Hartman <gregkh@suse.de>
Tue, 3 Jan 2012 20:50:48 +0000 (12:50 -0800)
committer Greg Kroah-Hartman <gregkh@suse.de>
Tue, 3 Jan 2012 20:50:48 +0000 (12:50 -0800)
diff --git a/queue-3.0/ipv4-flush-route-cache-after-change-accept_local.patch b/queue-3.0/ipv4-flush-route-cache-after-change-accept_local.patch

new file mode 100644 (file)

index 0000000..5fcbde0
--- /dev/null
+++ b/queue-3.0/ipv4-flush-route-cache-after-change-accept_local.patch
@@ -0,0 +1,43 @@
+From f9e1e7e59d60d85684a05366d282b6d6bdfdb687 Mon Sep 17 00:00:00 2001
+From: "Peter Pan(潘卫平)" <panweiping3@gmail.com>
+Date: Thu, 1 Dec 2011 15:47:06 +0000
+Subject: ipv4: flush route cache after change accept_local
+
+
+From: Weiping Pan <panweiping3@gmail.com>
+
+[ Upstream commit d01ff0a049f749e0bf10a35bb23edd012718c8c2 ]
+
+After reset ipv4_devconf->data[IPV4_DEVCONF_ACCEPT_LOCAL] to 0,
+we should flush route cache, or it will continue receive packets with local
+source address, which should be dropped.
+
+Signed-off-by: Weiping Pan <panweiping3@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/ipv4/devinet.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/net/ipv4/devinet.c
++++ b/net/ipv4/devinet.c
+@@ -1490,7 +1490,9 @@ static int devinet_conf_proc(ctl_table *
+                            void __user *buffer,
+                            size_t *lenp, loff_t *ppos)
+ {
++      int old_value = *(int *)ctl->data;
+       int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
++      int new_value = *(int *)ctl->data;
+ 
+       if (write) {
+               struct ipv4_devconf *cnf = ctl->extra1;
+@@ -1501,6 +1503,9 @@ static int devinet_conf_proc(ctl_table *
+ 
+               if (cnf == net->ipv4.devconf_dflt)
+                       devinet_copy_dflt_conf(net, i);
++              if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
++                      if ((new_value == 0) && (old_value != 0))
++                              rt_cache_flush(net, 0);
+       }
+ 
+       return ret;
diff --git a/queue-3.0/ipv4-reintroduce-route-cache-garbage-collector.patch b/queue-3.0/ipv4-reintroduce-route-cache-garbage-collector.patch

new file mode 100644 (file)

index 0000000..7d8f852
--- /dev/null
+++ b/queue-3.0/ipv4-reintroduce-route-cache-garbage-collector.patch
@@ -0,0 +1,165 @@
+From 92023c1e1af8d82e1dce92a17efd0f1adffa2dd2 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet@gmail.com>
+Date: Wed, 21 Dec 2011 15:47:16 -0500
+Subject: ipv4: reintroduce route cache garbage collector
+
+
+From: Eric Dumazet <eric.dumazet@gmail.com>
+
+[ Upstream commit 9f28a2fc0bd77511f649c0a788c7bf9a5fd04edb ]
+
+Commit 2c8cec5c10b (ipv4: Cache learned PMTU information in inetpeer)
+removed IP route cache garbage collector a bit too soon, as this gc was
+responsible for expired routes cleanup, releasing their neighbour
+reference.
+
+As pointed out by Robert Gladewitz, recent kernels can fill and exhaust
+their neighbour cache.
+
+Reintroduce the garbage collection, since we'll have to wait our
+neighbour lookups become refcount-less to not depend on this stuff.
+
+Reported-by: Robert Gladewitz <gladewitz@gmx.de>
+Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/ipv4/route.c |  106 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 106 insertions(+)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -132,6 +132,9 @@ static int ip_rt_min_pmtu __read_mostly
+ static int ip_rt_min_advmss __read_mostly     = 256;
+ static int rt_chain_length_max __read_mostly  = 20;
+ 
++static struct delayed_work expires_work;
++static unsigned long expires_ljiffies;
++
+ /*
+  *    Interface to generic destination cache.
+  */
+@@ -821,6 +824,97 @@ static int has_noalias(const struct rtab
+       return ONE;
+ }
+ 
++static void rt_check_expire(void)
++{
++      static unsigned int rover;
++      unsigned int i = rover, goal;
++      struct rtable *rth;
++      struct rtable __rcu **rthp;
++      unsigned long samples = 0;
++      unsigned long sum = 0, sum2 = 0;
++      unsigned long delta;
++      u64 mult;
++
++      delta = jiffies - expires_ljiffies;
++      expires_ljiffies = jiffies;
++      mult = ((u64)delta) << rt_hash_log;
++      if (ip_rt_gc_timeout > 1)
++              do_div(mult, ip_rt_gc_timeout);
++      goal = (unsigned int)mult;
++      if (goal > rt_hash_mask)
++              goal = rt_hash_mask + 1;
++      for (; goal > 0; goal--) {
++              unsigned long tmo = ip_rt_gc_timeout;
++              unsigned long length;
++
++              i = (i + 1) & rt_hash_mask;
++              rthp = &rt_hash_table[i].chain;
++
++              if (need_resched())
++                      cond_resched();
++
++              samples++;
++
++              if (rcu_dereference_raw(*rthp) == NULL)
++                      continue;
++              length = 0;
++              spin_lock_bh(rt_hash_lock_addr(i));
++              while ((rth = rcu_dereference_protected(*rthp,
++                                      lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
++                      prefetch(rth->dst.rt_next);
++                      if (rt_is_expired(rth)) {
++                              *rthp = rth->dst.rt_next;
++                              rt_free(rth);
++                              continue;
++                      }
++                      if (rth->dst.expires) {
++                              /* Entry is expired even if it is in use */
++                              if (time_before_eq(jiffies, rth->dst.expires)) {
++nofree:
++                                      tmo >>= 1;
++                                      rthp = &rth->dst.rt_next;
++                                      /*
++                                       * We only count entries on
++                                       * a chain with equal hash inputs once
++                                       * so that entries for different QOS
++                                       * levels, and other non-hash input
++                                       * attributes don't unfairly skew
++                                       * the length computation
++                                       */
++                                      length += has_noalias(rt_hash_table[i].chain, rth);
++                                      continue;
++                              }
++                      } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
++                              goto nofree;
++
++                      /* Cleanup aged off entries. */
++                      *rthp = rth->dst.rt_next;
++                      rt_free(rth);
++              }
++              spin_unlock_bh(rt_hash_lock_addr(i));
++              sum += length;
++              sum2 += length*length;
++      }
++      if (samples) {
++              unsigned long avg = sum / samples;
++              unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
++              rt_chain_length_max = max_t(unsigned long,
++                                      ip_rt_gc_elasticity,
++                                      (avg + 4*sd) >> FRACT_BITS);
++      }
++      rover = i;
++}
++
++/*
++ * rt_worker_func() is run in process context.
++ * we call rt_check_expire() to scan part of the hash table
++ */
++static void rt_worker_func(struct work_struct *work)
++{
++      rt_check_expire();
++      schedule_delayed_work(&expires_work, ip_rt_gc_interval);
++}
++
+ /*
+  * Perturbation of rt_genid by a small quantity [1..256]
+  * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
+@@ -3088,6 +3182,13 @@ static ctl_table ipv4_route_table[] = {
+               .proc_handler   = proc_dointvec_jiffies,
+       },
+       {
++              .procname       = "gc_interval",
++              .data           = &ip_rt_gc_interval,
++              .maxlen         = sizeof(int),
++              .mode           = 0644,
++              .proc_handler   = proc_dointvec_jiffies,
++      },
++      {
+               .procname       = "redirect_load",
+               .data           = &ip_rt_redirect_load,
+               .maxlen         = sizeof(int),
+@@ -3297,6 +3398,11 @@ int __init ip_rt_init(void)
+       devinet_init();
+       ip_fib_init();
+ 
++      INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
++      expires_ljiffies = jiffies;
++      schedule_delayed_work(&expires_work,
++              net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
++
+       if (ip_rt_proc_init())
+               printk(KERN_ERR "Unable to create route proc files\n");
+ #ifdef CONFIG_XFRM
diff --git a/queue-3.0/ipv4-using-prefetch-requires-including-prefetch.h.patch b/queue-3.0/ipv4-using-prefetch-requires-including-prefetch.h.patch

new file mode 100644 (file)

index 0000000..1a7124b
--- /dev/null
+++ b/queue-3.0/ipv4-using-prefetch-requires-including-prefetch.h.patch
@@ -0,0 +1,29 @@
+From 31a4e30815354c5e49eb20752409ef7c721fc6b0 Mon Sep 17 00:00:00 2001
+From: Stephen Rothwell <sfr@canb.auug.org.au>
+Date: Thu, 22 Dec 2011 17:03:29 +1100
+Subject: ipv4: using prefetch requires including prefetch.h
+
+
+From: Stephen Rothwell <sfr@canb.auug.org.au>
+
+[ Upstream commit b9eda06f80b0db61a73bd87c6b0eb67d8aca55ad ]
+
+Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
+Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
+Acked-by: David Miller <davem@davemloft.net>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/ipv4/route.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -91,6 +91,7 @@
+ #include <linux/rcupdate.h>
+ #include <linux/times.h>
+ #include <linux/slab.h>
++#include <linux/prefetch.h>
+ #include <net/dst.h>
+ #include <net/net_namespace.h>
+ #include <net/protocol.h>
diff --git a/queue-3.0/llc-llc_cmsg_rcv-was-getting-called-after-sk_eat_skb.patch b/queue-3.0/llc-llc_cmsg_rcv-was-getting-called-after-sk_eat_skb.patch

new file mode 100644 (file)

index 0000000..16f3288
--- /dev/null
+++ b/queue-3.0/llc-llc_cmsg_rcv-was-getting-called-after-sk_eat_skb.patch
@@ -0,0 +1,57 @@
+From 76bcc2af1348ccf5a40421e1181f2547718a1e51 Mon Sep 17 00:00:00 2001
+From: Alex Juncu <ajuncu@ixiacom.com>
+Date: Thu, 15 Dec 2011 23:01:25 +0000
+Subject: llc: llc_cmsg_rcv was getting called after sk_eat_skb.
+
+
+From: Alex Juncu <ajuncu@ixiacom.com>
+
+[ Upstream commit 9cef310fcdee12b49b8b4c96fd8f611c8873d284 ]
+
+Received non stream protocol packets were calling llc_cmsg_rcv that used a
+skb after that skb was released by sk_eat_skb. This caused received STP
+packets to generate kernel panics.
+
+Signed-off-by: Alexandru Juncu <ajuncu@ixiacom.com>
+Signed-off-by: Kunjan Naik <knaik@ixiacom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/llc/af_llc.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/net/llc/af_llc.c
++++ b/net/llc/af_llc.c
+@@ -833,15 +833,15 @@ static int llc_ui_recvmsg(struct kiocb *
+               copied += used;
+               len -= used;
+ 
++              /* For non stream protcols we get one packet per recvmsg call */
++              if (sk->sk_type != SOCK_STREAM)
++                      goto copy_uaddr;
++
+               if (!(flags & MSG_PEEK)) {
+                       sk_eat_skb(sk, skb, 0);
+                       *seq = 0;
+               }
+ 
+-              /* For non stream protcols we get one packet per recvmsg call */
+-              if (sk->sk_type != SOCK_STREAM)
+-                      goto copy_uaddr;
+-
+               /* Partial read */
+               if (used + offset < skb->len)
+                       continue;
+@@ -857,6 +857,12 @@ copy_uaddr:
+       }
+       if (llc_sk(sk)->cmsg_flags)
+               llc_cmsg_rcv(msg, skb);
++
++      if (!(flags & MSG_PEEK)) {
++                      sk_eat_skb(sk, skb, 0);
++                      *seq = 0;
++      }
++
+       goto out;
+ }
+ 
diff --git a/queue-3.0/mqprio-avoid-panic-if-no-options-are-provided.patch b/queue-3.0/mqprio-avoid-panic-if-no-options-are-provided.patch

new file mode 100644 (file)

index 0000000..655666e
--- /dev/null
+++ b/queue-3.0/mqprio-avoid-panic-if-no-options-are-provided.patch
@@ -0,0 +1,32 @@
+From 5e59a51890a259701718b9328560934407176b46 Mon Sep 17 00:00:00 2001
+From: Thomas Graf <tgraf@redhat.com>
+Date: Thu, 22 Dec 2011 02:05:07 +0000
+Subject: mqprio: Avoid panic if no options are provided
+
+
+From: Thomas Graf <tgraf@redhat.com>
+
+[ Upstream commit 7838f2ce36b6ab5c13ef20b1857e3bbd567f1759 ]
+
+Userspace may not provide TCA_OPTIONS, in fact tc currently does
+so not do so if no arguments are specified on the command line.
+Return EINVAL instead of panicing.
+
+Signed-off-by: Thomas Graf <tgraf@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/sched/sch_mqprio.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sched/sch_mqprio.c
++++ b/net/sched/sch_mqprio.c
+@@ -106,7 +106,7 @@ static int mqprio_init(struct Qdisc *sch
+       if (!netif_is_multiqueue(dev))
+               return -EOPNOTSUPP;
+ 
+-      if (nla_len(opt) < sizeof(*qopt))
++      if (!opt || nla_len(opt) < sizeof(*qopt))
+               return -EINVAL;
+ 
+       qopt = nla_data(opt);
diff --git a/queue-3.0/net-bpf_jit-fix-an-off-one-bug-in-x86_64-cond-jump-target.patch b/queue-3.0/net-bpf_jit-fix-an-off-one-bug-in-x86_64-cond-jump-target.patch

new file mode 100644 (file)

index 0000000..9b3d69a
--- /dev/null
+++ b/queue-3.0/net-bpf_jit-fix-an-off-one-bug-in-x86_64-cond-jump-target.patch
@@ -0,0 +1,37 @@
+From c9443fa522619b70cc9ea12d7f58e4cf633500c3 Mon Sep 17 00:00:00 2001
+From: Markus Kötter <nepenthesdev@gmail.com>
+Date: Sat, 17 Dec 2011 11:39:08 +0000
+Subject: net: bpf_jit: fix an off-one bug in x86_64 cond jump target
+
+
+From: Markus Kötter <nepenthesdev@gmail.com>
+
+[ Upstream commit a03ffcf873fe0f2565386ca8ef832144c42e67fa ]
+
+x86 jump instruction size is 2 or 5 bytes (near/long jump), not 2 or 6
+bytes.
+
+In case a conditional jump is followed by a long jump, conditional jump
+target is one byte past the start of target instruction.
+
+Signed-off-by: Markus Kötter <nepenthesdev@gmail.com>
+Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/net/bpf_jit_comp.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -568,8 +568,8 @@ cond_branch:                       f_offset = addrs[i + filt
+                                       break;
+                               }
+                               if (filter[i].jt != 0) {
+-                                      if (filter[i].jf)
+-                                              t_offset += is_near(f_offset) ? 2 : 6;
++                                      if (filter[i].jf && f_offset)
++                                              t_offset += is_near(f_offset) ? 2 : 5;
+                                       EMIT_COND_JMP(t_op, t_offset);
+                                       if (filter[i].jf)
+                                               EMIT_JMP(f_offset);
diff --git a/queue-3.0/net-have-ipconfig-not-wait-if-no-dev-is-available.patch b/queue-3.0/net-have-ipconfig-not-wait-if-no-dev-is-available.patch

new file mode 100644 (file)

index 0000000..f1adb9c
--- /dev/null
+++ b/queue-3.0/net-have-ipconfig-not-wait-if-no-dev-is-available.patch
@@ -0,0 +1,40 @@
+From 2ec3264c7298c2326f33c482e787ec449638ed85 Mon Sep 17 00:00:00 2001
+From: Gerlando Falauto <gerlando.falauto@keymile.com>
+Date: Mon, 19 Dec 2011 22:58:04 +0000
+Subject: net: have ipconfig not wait if no dev is available
+
+
+From: Gerlando Falauto <gerlando.falauto@keymile.com>
+
+[ Upstream commit cd7816d14953c8af910af5bb92f488b0b277e29d ]
+
+previous commit 3fb72f1e6e6165c5f495e8dc11c5bbd14c73385c
+makes IP-Config wait for carrier on at least one network device.
+
+Before waiting (predefined value 120s), check that at least one device
+was successfully brought up. Otherwise (e.g. buggy bootloader
+which does not set the MAC address) there is no point in waiting
+for carrier.
+
+Cc: Micha Nelissen <micha@neli.hopto.org>
+Cc: Holger Brunck <holger.brunck@keymile.com>
+Signed-off-by: Gerlando Falauto <gerlando.falauto@keymile.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/ipv4/ipconfig.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ipv4/ipconfig.c
++++ b/net/ipv4/ipconfig.c
+@@ -252,6 +252,10 @@ static int __init ic_open_devs(void)
+               }
+       }
+ 
++      /* no point in waiting if we could not bring up at least one device */
++      if (!ic_first_dev)
++              goto have_carrier;
++
+       /* wait for a carrier on at least one device */
+       start = jiffies;
+       while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
diff --git a/queue-3.0/ppp-fix-pptp-double-release_sock-in-pptp_bind.patch b/queue-3.0/ppp-fix-pptp-double-release_sock-in-pptp_bind.patch

new file mode 100644 (file)

index 0000000..3910d66
--- /dev/null
+++ b/queue-3.0/ppp-fix-pptp-double-release_sock-in-pptp_bind.patch
@@ -0,0 +1,32 @@
+From 884aad5cf67fed3cc15003dbb9c8cdf9b6833b57 Mon Sep 17 00:00:00 2001
+From: Djalal Harouni <tixxdz@opendz.org>
+Date: Tue, 6 Dec 2011 15:47:12 +0000
+Subject: ppp: fix pptp double release_sock in pptp_bind()
+
+
+From: Djalal Harouni <tixxdz@opendz.org>
+
+[ Upstream commit a454daceb78844a09c08b6e2d8badcb76a5d73b9 ]
+
+Signed-off-by: Djalal Harouni <tixxdz@opendz.org>
+Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ drivers/net/pptp.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/drivers/net/pptp.c
++++ b/drivers/net/pptp.c
+@@ -418,10 +418,8 @@ static int pptp_bind(struct socket *sock
+       lock_sock(sk);
+ 
+       opt->src_addr = sp->sa_addr.pptp;
+-      if (add_chan(po)) {
+-              release_sock(sk);
++      if (add_chan(po))
+               error = -EBUSY;
+-      }
+ 
+       release_sock(sk);
+       return error;
diff --git a/queue-3.0/sch_gred-should-not-use-gfp_kernel-while-holding-a-spinlock.patch b/queue-3.0/sch_gred-should-not-use-gfp_kernel-while-holding-a-spinlock.patch

new file mode 100644 (file)

index 0000000..e65ef49
--- /dev/null
+++ b/queue-3.0/sch_gred-should-not-use-gfp_kernel-while-holding-a-spinlock.patch
@@ -0,0 +1,36 @@
+From 8c7968a17315bcfa4b907334a35675129d84025f Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet@gmail.com>
+Date: Sun, 11 Dec 2011 23:42:53 +0000
+Subject: sch_gred: should not use GFP_KERNEL while holding a spinlock
+
+
+From: Eric Dumazet <eric.dumazet@gmail.com>
+
+[ Upstream commit 3f1e6d3fd37bd4f25e5b19f1c7ca21850426c33f ]
+
+gred_change_vq() is called under sch_tree_lock(sch).
+
+This means a spinlock is held, and we are not allowed to sleep in this
+context.
+
+We might pre-allocate memory using GFP_KERNEL before taking spinlock,
+but this is not suitable for stable material.
+
+Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/sched/sch_gred.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sched/sch_gred.c
++++ b/net/sched/sch_gred.c
+@@ -385,7 +385,7 @@ static inline int gred_change_vq(struct
+       struct gred_sched_data *q;
+ 
+       if (table->tab[dp] == NULL) {
+-              table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL);
++              table->tab[dp] = kzalloc(sizeof(*q), GFP_ATOMIC);
+               if (table->tab[dp] == NULL)
+                       return -ENOMEM;
+       }
diff --git a/queue-3.0/sctp-do-not-account-for-sizeof-struct-sk_buff-in-estimated-rwnd.patch b/queue-3.0/sctp-do-not-account-for-sizeof-struct-sk_buff-in-estimated-rwnd.patch

new file mode 100644 (file)

index 0000000..665021d
--- /dev/null
+++ b/queue-3.0/sctp-do-not-account-for-sizeof-struct-sk_buff-in-estimated-rwnd.patch
@@ -0,0 +1,97 @@
+From 094b242b70f67ac3996934432f0a0d564e791cf6 Mon Sep 17 00:00:00 2001
+From: Thomas Graf <tgraf@redhat.com>
+Date: Mon, 19 Dec 2011 04:11:40 +0000
+Subject: sctp: Do not account for sizeof(struct sk_buff) in estimated rwnd
+
+
+From: Thomas Graf <tgraf@redhat.com>
+
+[ Upstream commit a76c0adf60f6ca5ff3481992e4ea0383776b24d2 ]
+
+When checking whether a DATA chunk fits into the estimated rwnd a
+full sizeof(struct sk_buff) is added to the needed chunk size. This
+quickly exhausts the available rwnd space and leads to packets being
+sent which are much below the PMTU limit. This can lead to much worse
+performance.
+
+The reason for this behaviour was to avoid putting too much memory
+pressure on the receiver. The concept is not completely irational
+because a Linux receiver does in fact clone an skb for each DATA chunk
+delivered. However, Linux also reserves half the available socket
+buffer space for data structures therefore usage of it is already
+accounted for.
+
+When proposing to change this the last time it was noted that this
+behaviour was introduced to solve a performance issue caused by rwnd
+overusage in combination with small DATA chunks.
+
+Trying to reproduce this I found that with the sk_buff overhead removed,
+the performance would improve significantly unless socket buffer limits
+are increased.
+
+The following numbers have been gathered using a patched iperf
+supporting SCTP over a live 1 Gbit ethernet network. The -l option
+was used to limit DATA chunk sizes. The numbers listed are based on
+the average of 3 test runs each. Default values have been used for
+sk_(r|w)mem.
+
+Chunk
+Size    Unpatched     No Overhead
+-------------------------------------
+   4    15.2 Kbit [!]   12.2 Mbit [!]
+   8    35.8 Kbit [!]   26.0 Mbit [!]
+  16    95.5 Kbit [!]   54.4 Mbit [!]
+  32   106.7 Mbit      102.3 Mbit
+  64   189.2 Mbit      188.3 Mbit
+ 128   331.2 Mbit      334.8 Mbit
+ 256   537.7 Mbit      536.0 Mbit
+ 512   766.9 Mbit      766.6 Mbit
+1024   810.1 Mbit      808.6 Mbit
+
+Signed-off-by: Thomas Graf <tgraf@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/sctp/output.c   |    8 +-------
+ net/sctp/outqueue.c |    6 ++----
+ 2 files changed, 3 insertions(+), 11 deletions(-)
+
+--- a/net/sctp/output.c
++++ b/net/sctp/output.c
+@@ -697,13 +697,7 @@ static void sctp_packet_append_data(stru
+       /* Keep track of how many bytes are in flight to the receiver. */
+       asoc->outqueue.outstanding_bytes += datasize;
+ 
+-      /* Update our view of the receiver's rwnd. Include sk_buff overhead
+-       * while updating peer.rwnd so that it reduces the chances of a
+-       * receiver running out of receive buffer space even when receive
+-       * window is still open. This can happen when a sender is sending
+-       * sending small messages.
+-       */
+-      datasize += sizeof(struct sk_buff);
++      /* Update our view of the receiver's rwnd. */
+       if (datasize < rwnd)
+               rwnd -= datasize;
+       else
+--- a/net/sctp/outqueue.c
++++ b/net/sctp/outqueue.c
+@@ -411,8 +411,7 @@ void sctp_retransmit_mark(struct sctp_ou
+                                       chunk->transport->flight_size -=
+                                                       sctp_data_size(chunk);
+                               q->outstanding_bytes -= sctp_data_size(chunk);
+-                              q->asoc->peer.rwnd += (sctp_data_size(chunk) +
+-                                                      sizeof(struct sk_buff));
++                              q->asoc->peer.rwnd += sctp_data_size(chunk);
+                       }
+                       continue;
+               }
+@@ -432,8 +431,7 @@ void sctp_retransmit_mark(struct sctp_ou
+                        * (Section 7.2.4)), add the data size of those
+                        * chunks to the rwnd.
+                        */
+-                      q->asoc->peer.rwnd += (sctp_data_size(chunk) +
+-                                              sizeof(struct sk_buff));
++                      q->asoc->peer.rwnd += sctp_data_size(chunk);
+                       q->outstanding_bytes -= sctp_data_size(chunk);
+                       if (chunk->transport)
+                               transport->flight_size -= sctp_data_size(chunk);
diff --git a/queue-3.0/sctp-fix-incorrect-overflow-check-on-autoclose.patch b/queue-3.0/sctp-fix-incorrect-overflow-check-on-autoclose.patch

new file mode 100644 (file)

index 0000000..dc982f8
--- /dev/null
+++ b/queue-3.0/sctp-fix-incorrect-overflow-check-on-autoclose.patch
@@ -0,0 +1,121 @@
+From 4dff6c35f9ff1cf62dd4435517adae40a3dd2d8a Mon Sep 17 00:00:00 2001
+From: Xi Wang <xi.wang@gmail.com>
+Date: Fri, 16 Dec 2011 12:44:15 +0000
+Subject: sctp: fix incorrect overflow check on autoclose
+
+
+From: Xi Wang <xi.wang@gmail.com>
+
+[ Upstream commit 2692ba61a82203404abd7dd2a027bda962861f74 ]
+
+Commit 8ffd3208 voids the previous patches f6778aab and 810c0719 for
+limiting the autoclose value.  If userspace passes in -1 on 32-bit
+platform, the overflow check didn't work and autoclose would be set
+to 0xffffffff.
+
+This patch defines a max_autoclose (in seconds) for limiting the value
+and exposes it through sysctl, with the following intentions.
+
+1) Avoid overflowing autoclose * HZ.
+
+2) Keep the default autoclose bound consistent across 32- and 64-bit
+   platforms (INT_MAX / HZ in this patch).
+
+3) Keep the autoclose value consistent between setsockopt() and
+   getsockopt() calls.
+
+Suggested-by: Vlad Yasevich <vladislav.yasevich@hp.com>
+Signed-off-by: Xi Wang <xi.wang@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/net/sctp/structs.h |    4 ++++
+ net/sctp/associola.c       |    2 +-
+ net/sctp/protocol.c        |    3 +++
+ net/sctp/socket.c          |    2 --
+ net/sctp/sysctl.c          |   13 +++++++++++++
+ 5 files changed, 21 insertions(+), 3 deletions(-)
+
+--- a/include/net/sctp/structs.h
++++ b/include/net/sctp/structs.h
+@@ -236,6 +236,9 @@ extern struct sctp_globals {
+        * bits is an indicator of when to send and window update SACK.
+        */
+       int rwnd_update_shift;
++
++      /* Threshold for autoclose timeout, in seconds. */
++      unsigned long max_autoclose;
+ } sctp_globals;
+ 
+ #define sctp_rto_initial              (sctp_globals.rto_initial)
+@@ -271,6 +274,7 @@ extern struct sctp_globals {
+ #define sctp_auth_enable              (sctp_globals.auth_enable)
+ #define sctp_checksum_disable         (sctp_globals.checksum_disable)
+ #define sctp_rwnd_upd_shift           (sctp_globals.rwnd_update_shift)
++#define sctp_max_autoclose            (sctp_globals.max_autoclose)
+ 
+ /* SCTP Socket type: UDP or TCP style. */
+ typedef enum {
+--- a/net/sctp/associola.c
++++ b/net/sctp/associola.c
+@@ -173,7 +173,7 @@ static struct sctp_association *sctp_ass
+       asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0;
+       asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay;
+       asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
+-              (unsigned long)sp->autoclose * HZ;
++              min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ;
+ 
+       /* Initializes the timers */
+       for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
+--- a/net/sctp/protocol.c
++++ b/net/sctp/protocol.c
+@@ -1144,6 +1144,9 @@ SCTP_STATIC __init int sctp_init(void)
+       sctp_max_instreams              = SCTP_DEFAULT_INSTREAMS;
+       sctp_max_outstreams             = SCTP_DEFAULT_OUTSTREAMS;
+ 
++      /* Initialize maximum autoclose timeout. */
++      sctp_max_autoclose              = INT_MAX / HZ;
++
+       /* Initialize handle used for association ids. */
+       idr_init(&sctp_assocs_id);
+ 
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -2129,8 +2129,6 @@ static int sctp_setsockopt_autoclose(str
+               return -EINVAL;
+       if (copy_from_user(&sp->autoclose, optval, optlen))
+               return -EFAULT;
+-      /* make sure it won't exceed MAX_SCHEDULE_TIMEOUT */
+-      sp->autoclose = min_t(long, sp->autoclose, MAX_SCHEDULE_TIMEOUT / HZ);
+ 
+       return 0;
+ }
+--- a/net/sctp/sysctl.c
++++ b/net/sctp/sysctl.c
+@@ -53,6 +53,10 @@ static int sack_timer_min = 1;
+ static int sack_timer_max = 500;
+ static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
+ static int rwnd_scale_max = 16;
++static unsigned long max_autoclose_min = 0;
++static unsigned long max_autoclose_max =
++      (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX)
++      ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ;
+ 
+ extern long sysctl_sctp_mem[3];
+ extern int sysctl_sctp_rmem[3];
+@@ -251,6 +255,15 @@ static ctl_table sctp_table[] = {
+               .extra1         = &one,
+               .extra2         = &rwnd_scale_max,
+       },
++      {
++              .procname       = "max_autoclose",
++              .data           = &sctp_max_autoclose,
++              .maxlen         = sizeof(unsigned long),
++              .mode           = 0644,
++              .proc_handler   = &proc_doulongvec_minmax,
++              .extra1         = &max_autoclose_min,
++              .extra2         = &max_autoclose_max,
++      },
+ 
+       { /* sentinel */ }
+ };
diff --git a/queue-3.0/series b/queue-3.0/series

index 08a268a0ecedc78c01423b239761a1f7f7bc7951..15874a21c1bd4a001c7b3890b0e464b178d5d54b 100644 (file)
--- a/queue-3.0/series
+++ b/queue-3.0/series
@@ -48,3 +48,14 @@ sparc32-remove-uses-of-g7-in-memcpy-implementation.patch
  sparc32-correct-the-return-value-of-memcpy.patch
  sparc64-fix-masking-and-shifting-in-vis-fpcmp-emulation.patch
  sparc-fix-handling-of-orig_i0-wrt.-debugging-when-restarting-syscalls.patch
+net-bpf_jit-fix-an-off-one-bug-in-x86_64-cond-jump-target.patch
+ppp-fix-pptp-double-release_sock-in-pptp_bind.patch
+llc-llc_cmsg_rcv-was-getting-called-after-sk_eat_skb.patch
+mqprio-avoid-panic-if-no-options-are-provided.patch
+net-have-ipconfig-not-wait-if-no-dev-is-available.patch
+sch_gred-should-not-use-gfp_kernel-while-holding-a-spinlock.patch
+sctp-fix-incorrect-overflow-check-on-autoclose.patch
+sctp-do-not-account-for-sizeof-struct-sk_buff-in-estimated-rwnd.patch
+ipv4-flush-route-cache-after-change-accept_local.patch
+ipv4-reintroduce-route-cache-garbage-collector.patch
+ipv4-using-prefetch-requires-including-prefetch.h.patch
author	Greg Kroah-Hartman <gregkh@suse.de>
	Tue, 3 Jan 2012 20:50:48 +0000 (12:50 -0800)
committer	Greg Kroah-Hartman <gregkh@suse.de>
	Tue, 3 Jan 2012 20:50:48 +0000 (12:50 -0800)
queue-3.0/ipv4-flush-route-cache-after-change-accept_local.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/ipv4-reintroduce-route-cache-garbage-collector.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/ipv4-using-prefetch-requires-including-prefetch.h.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/llc-llc_cmsg_rcv-was-getting-called-after-sk_eat_skb.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/mqprio-avoid-panic-if-no-options-are-provided.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/net-bpf_jit-fix-an-off-one-bug-in-x86_64-cond-jump-target.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/net-have-ipconfig-not-wait-if-no-dev-is-available.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/ppp-fix-pptp-double-release_sock-in-pptp_bind.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/sch_gred-should-not-use-gfp_kernel-while-holding-a-spinlock.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/sctp-do-not-account-for-sizeof-struct-sk_buff-in-estimated-rwnd.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/sctp-fix-incorrect-overflow-check-on-autoclose.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/series		patch \| blob \| blame \| history