From: Greg Kroah-Hartman Date: Wed, 22 Sep 2010 19:58:37 +0000 (-0700) Subject: .35 patches X-Git-Tag: v2.6.35.6~17 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f3503b3690da48a1fc081ee31aa14e2d22b08b83;p=thirdparty%2Fkernel%2Fstable-queue.git .35 patches --- diff --git a/queue-2.6.35/bonding-correctly-process-non-linear-skbs.patch b/queue-2.6.35/bonding-correctly-process-non-linear-skbs.patch new file mode 100644 index 00000000000..0051fcd87a0 --- /dev/null +++ b/queue-2.6.35/bonding-correctly-process-non-linear-skbs.patch @@ -0,0 +1,59 @@ +From ab12811c89e88f2e66746790b1fe4469ccb7bdd9 Mon Sep 17 00:00:00 2001 +From: Andy Gospodarek +Date: Fri, 10 Sep 2010 11:43:20 +0000 +Subject: bonding: correctly process non-linear skbs + +From: Andy Gospodarek + +commit ab12811c89e88f2e66746790b1fe4469ccb7bdd9 upstream. + +It was recently brought to my attention that 802.3ad mode bonds would no +longer form when using some network hardware after a driver update. +After snooping around I realized that the particular hardware was using +page-based skbs and found that skb->data did not contain a valid LACPDU +as it was not stored there. That explained the inability to form an +802.3ad-based bond. For balance-alb mode bonds this was also an issue +as ARPs would not be properly processed. + +This patch fixes the issue in my tests and should be applied to 2.6.36 +and as far back as anyone cares to add it to stable. + +Thanks to Alexander Duyck and Jesse +Brandeburg for the suggestions on this one. + +Signed-off-by: Andy Gospodarek +CC: Alexander Duyck +CC: Jesse Brandeburg +Signed-off-by: Jay Vosburgh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/bonding/bond_3ad.c | 3 +++ + drivers/net/bonding/bond_alb.c | 3 +++ + 2 files changed, 6 insertions(+) + +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -2466,6 +2466,9 @@ int bond_3ad_lacpdu_recv(struct sk_buff + if (!(dev->flags & IFF_MASTER)) + goto out; + ++ if (!pskb_may_pull(skb, sizeof(struct lacpdu))) ++ goto out; ++ + read_lock(&bond->lock); + slave = bond_get_slave_by_dev((struct bonding *)netdev_priv(dev), + orig_dev); +--- a/drivers/net/bonding/bond_alb.c ++++ b/drivers/net/bonding/bond_alb.c +@@ -369,6 +369,9 @@ static int rlb_arp_recv(struct sk_buff * + goto out; + } + ++ if (!pskb_may_pull(skb, arp_hdr_len(bond_dev))) ++ goto out; ++ + if (skb->len < sizeof(struct arp_pkt)) { + pr_debug("Packet is too small to be an ARP\n"); + goto out; diff --git a/queue-2.6.35/bridge-clear-inet-control-block-of-skbs-passed-into-ip_fragment.patch b/queue-2.6.35/bridge-clear-inet-control-block-of-skbs-passed-into-ip_fragment.patch new file mode 100644 index 00000000000..cec4227fd88 --- /dev/null +++ b/queue-2.6.35/bridge-clear-inet-control-block-of-skbs-passed-into-ip_fragment.patch @@ -0,0 +1,41 @@ +From 29b17c3b0487c9d05affb9c50c76508fd73a7a63 Mon Sep 17 00:00:00 2001 +From: David S. Miller +Date: Wed, 1 Sep 2010 18:06:39 -0700 +Subject: bridge: Clear INET control block of SKBs passed into ip_fragment(). + + +From: David S. Miller + +[ Upstream commit 4ce6b9e1621c187a32a47a17bf6be93b1dc4a3df ] + +In a similar vain to commit 17762060c25590bfddd68cc1131f28ec720f405f +("bridge: Clear IPCB before possible entry into IP stack") + +Any time we call into the IP stack we have to make sure the state +there is as expected by the ipv4 code. + +With help from Eric Dumazet and Herbert Xu. + +Reported-by: Brandan Das +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_netfilter.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/bridge/br_netfilter.c ++++ b/net/bridge/br_netfilter.c +@@ -749,9 +749,11 @@ static int br_nf_dev_queue_xmit(struct s + { + if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && + skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && +- !skb_is_gso(skb)) ++ !skb_is_gso(skb)) { ++ /* BUG: Should really parse the IP options here. */ ++ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + return ip_fragment(skb, br_dev_queue_push_xmit); +- else ++ } else + return br_dev_queue_push_xmit(skb); + } + #else diff --git a/queue-2.6.35/drivers-net-cxgb3-cxgb3_main.c-prevent-reading-uninitialized-stack-memory.patch b/queue-2.6.35/drivers-net-cxgb3-cxgb3_main.c-prevent-reading-uninitialized-stack-memory.patch new file mode 100644 index 00000000000..0b004b3c305 --- /dev/null +++ b/queue-2.6.35/drivers-net-cxgb3-cxgb3_main.c-prevent-reading-uninitialized-stack-memory.patch @@ -0,0 +1,36 @@ +From 49c37c0334a9b85d30ab3d6b5d1acb05ef2ef6de Mon Sep 17 00:00:00 2001 +From: Dan Rosenberg +Date: Wed, 15 Sep 2010 11:43:12 +0000 +Subject: drivers/net/cxgb3/cxgb3_main.c: prevent reading uninitialized stack memory + +From: Dan Rosenberg + +commit 49c37c0334a9b85d30ab3d6b5d1acb05ef2ef6de upstream. + +Fixed formatting (tabs and line breaks). + +The CHELSIO_GET_QSET_NUM device ioctl allows unprivileged users to read +4 bytes of uninitialized stack memory, because the "addr" member of the +ch_reg struct declared on the stack in cxgb_extension_ioctl() is not +altered or zeroed before being copied back to the user. This patch +takes care of it. + +Signed-off-by: Dan Rosenberg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/cxgb3/cxgb3_main.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/cxgb3/cxgb3_main.c ++++ b/drivers/net/cxgb3/cxgb3_main.c +@@ -2296,6 +2296,8 @@ static int cxgb_extension_ioctl(struct n + case CHELSIO_GET_QSET_NUM:{ + struct ch_reg edata; + ++ memset(&edata, 0, sizeof(struct ch_reg)); ++ + edata.cmd = CHELSIO_GET_QSET_NUM; + edata.val = pi->nqsets; + if (copy_to_user(useraddr, &edata, sizeof(edata))) diff --git a/queue-2.6.35/drivers-net-eql.c-prevent-reading-uninitialized-stack-memory.patch b/queue-2.6.35/drivers-net-eql.c-prevent-reading-uninitialized-stack-memory.patch new file mode 100644 index 00000000000..ed6e6d1485b --- /dev/null +++ b/queue-2.6.35/drivers-net-eql.c-prevent-reading-uninitialized-stack-memory.patch @@ -0,0 +1,36 @@ +From 44467187dc22fdd33a1a06ea0ba86ce20be3fe3c Mon Sep 17 00:00:00 2001 +From: Dan Rosenberg +Date: Wed, 15 Sep 2010 11:43:04 +0000 +Subject: drivers/net/eql.c: prevent reading uninitialized stack memory + +From: Dan Rosenberg + +commit 44467187dc22fdd33a1a06ea0ba86ce20be3fe3c upstream. + +Fixed formatting (tabs and line breaks). + +The EQL_GETMASTRCFG device ioctl allows unprivileged users to read 16 +bytes of uninitialized stack memory, because the "master_name" member of +the master_config_t struct declared on the stack in eql_g_master_cfg() +is not altered or zeroed before being copied back to the user. This +patch takes care of it. + +Signed-off-by: Dan Rosenberg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/eql.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/eql.c ++++ b/drivers/net/eql.c +@@ -555,6 +555,8 @@ static int eql_g_master_cfg(struct net_d + equalizer_t *eql; + master_config_t mc; + ++ memset(&mc, 0, sizeof(master_config_t)); ++ + if (eql_is_master(dev)) { + eql = netdev_priv(dev); + mc.max_slaves = eql->max_slaves; diff --git a/queue-2.6.35/drivers-net-usb-hso.c-prevent-reading-uninitialized-memory.patch b/queue-2.6.35/drivers-net-usb-hso.c-prevent-reading-uninitialized-memory.patch new file mode 100644 index 00000000000..87e14e50d90 --- /dev/null +++ b/queue-2.6.35/drivers-net-usb-hso.c-prevent-reading-uninitialized-memory.patch @@ -0,0 +1,36 @@ +From 7011e660938fc44ed86319c18a5954e95a82ab3e Mon Sep 17 00:00:00 2001 +From: Dan Rosenberg +Date: Wed, 15 Sep 2010 11:43:28 +0000 +Subject: drivers/net/usb/hso.c: prevent reading uninitialized memory + +From: Dan Rosenberg + +commit 7011e660938fc44ed86319c18a5954e95a82ab3e upstream. + +Fixed formatting (tabs and line breaks). + +The TIOCGICOUNT device ioctl allows unprivileged users to read +uninitialized stack memory, because the "reserved" member of the +serial_icounter_struct struct declared on the stack in hso_get_count() +is not altered or zeroed before being copied back to the user. This +patch takes care of it. + +Signed-off-by: Dan Rosenberg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/usb/hso.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/usb/hso.c ++++ b/drivers/net/usb/hso.c +@@ -1653,6 +1653,8 @@ static int hso_get_count(struct hso_seri + struct uart_icount cnow; + struct hso_tiocmget *tiocmget = serial->tiocmget; + ++ memset(&icount, 0, sizeof(struct serial_icounter_struct)); ++ + if (!tiocmget) + return -ENOENT; + spin_lock_irq(&serial->serial_lock); diff --git a/queue-2.6.35/gro-fix-different-skb-headrooms.patch b/queue-2.6.35/gro-fix-different-skb-headrooms.patch new file mode 100644 index 00000000000..e704bf6ef85 --- /dev/null +++ b/queue-2.6.35/gro-fix-different-skb-headrooms.patch @@ -0,0 +1,68 @@ +From 1006c52235c334fed26dac15fd13a8e9b79d2845 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 1 Sep 2010 00:50:51 +0000 +Subject: gro: fix different skb headrooms + + +From: Eric Dumazet + +[ Upstream commit 3d3be4333fdf6faa080947b331a6a19bce1a4f57 ] + +Packets entering GRO might have different headrooms, even for a given +flow (because of implementation details in drivers, like copybreak). +We cant force drivers to deliver packets with a fixed headroom. + +1) fix skb_segment() + +skb_segment() makes the false assumption headrooms of fragments are same +than the head. When CHECKSUM_PARTIAL is used, this can give csum_start +errors, and crash later in skb_copy_and_csum_dev() + +2) allocate a minimal skb for head of frag_list + +skb_gro_receive() uses netdev_alloc_skb(headroom + skb_gro_offset(p)) to +allocate a fresh skb. This adds NET_SKB_PAD to a padding already +provided by netdevice, depending on various things, like copybreak. + +Use alloc_skb() to allocate an exact padding, to reduce cache line +needs: +NET_SKB_PAD + NET_IP_ALIGN + +bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=16626 + +Many thanks to Plamen Petrov, testing many debugging patches ! +With help of Jarek Poplawski. + +Reported-by: Plamen Petrov +Signed-off-by: Eric Dumazet +CC: Jarek Poplawski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -2574,6 +2574,10 @@ struct sk_buff *skb_segment(struct sk_bu + __copy_skb_header(nskb, skb); + nskb->mac_len = skb->mac_len; + ++ /* nskb and skb might have different headroom */ ++ if (nskb->ip_summed == CHECKSUM_PARTIAL) ++ nskb->csum_start += skb_headroom(nskb) - headroom; ++ + skb_reset_mac_header(nskb); + skb_set_network_header(nskb, skb->mac_len); + nskb->transport_header = (nskb->network_header + +@@ -2703,8 +2707,8 @@ int skb_gro_receive(struct sk_buff **hea + } else if (skb_gro_len(p) != pinfo->gso_size) + return -E2BIG; + +- headroom = skb_headroom(p); +- nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p)); ++ headroom = NET_SKB_PAD + NET_IP_ALIGN; ++ nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC); + if (unlikely(!nskb)) + return -ENOMEM; + diff --git a/queue-2.6.35/gro-re-fix-different-skb-headrooms.patch b/queue-2.6.35/gro-re-fix-different-skb-headrooms.patch new file mode 100644 index 00000000000..179e4d4e37e --- /dev/null +++ b/queue-2.6.35/gro-re-fix-different-skb-headrooms.patch @@ -0,0 +1,44 @@ +From b9dd9f07c077ce7410d2f5bd74fb3db8a9cd07fd Mon Sep 17 00:00:00 2001 +From: Jarek Poplawski +Date: Sat, 4 Sep 2010 10:34:29 +0000 +Subject: gro: Re-fix different skb headrooms + + +From: Jarek Poplawski + +[ Upstream commit 64289c8e6851bca0e589e064c9a5c9fbd6ae5dd4 ] + +The patch: "gro: fix different skb headrooms" in its part: +"2) allocate a minimal skb for head of frag_list" is buggy. The copied +skb has p->data set at the ip header at the moment, and skb_gro_offset +is the length of ip + tcp headers. So, after the change the length of +mac header is skipped. Later skb_set_mac_header() sets it into the +NET_SKB_PAD area (if it's long enough) and ip header is misaligned at +NET_SKB_PAD + NET_IP_ALIGN offset. There is no reason to assume the +original skb was wrongly allocated, so let's copy it as it was. + +bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=16626 +fixes commit: 3d3be4333fdf6faa080947b331a6a19bce1a4f57 + +Reported-by: Plamen Petrov +Signed-off-by: Jarek Poplawski +CC: Eric Dumazet +Acked-by: Eric Dumazet +Tested-by: Plamen Petrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -2707,7 +2707,7 @@ int skb_gro_receive(struct sk_buff **hea + } else if (skb_gro_len(p) != pinfo->gso_size) + return -E2BIG; + +- headroom = NET_SKB_PAD + NET_IP_ALIGN; ++ headroom = skb_headroom(p); + nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC); + if (unlikely(!nskb)) + return -ENOMEM; diff --git a/queue-2.6.35/irda-correctly-clean-up-self-ias_obj-on-irda_bind-failure.patch b/queue-2.6.35/irda-correctly-clean-up-self-ias_obj-on-irda_bind-failure.patch new file mode 100644 index 00000000000..ad1c1772a40 --- /dev/null +++ b/queue-2.6.35/irda-correctly-clean-up-self-ias_obj-on-irda_bind-failure.patch @@ -0,0 +1,39 @@ +From 192fa287ebdf9d4acaae6197fb8205452a159ccb Mon Sep 17 00:00:00 2001 +From: David S. Miller +Date: Mon, 30 Aug 2010 18:35:24 -0700 +Subject: irda: Correctly clean up self->ias_obj on irda_bind() failure. + + +From: David S. Miller + +[ Upstream commit 628e300cccaa628d8fb92aa28cb7530a3d5f2257 ] + +If irda_open_tsap() fails, the irda_bind() code tries to destroy +the ->ias_obj object by hand, but does so wrongly. + +In particular, it fails to a) release the hashbin attached to the +object and b) reset the self->ias_obj pointer to NULL. + +Fix both problems by using irias_delete_object() and explicitly +setting self->ias_obj to NULL, just as irda_release() does. + +Reported-by: Tavis Ormandy +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/irda/af_irda.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/irda/af_irda.c ++++ b/net/irda/af_irda.c +@@ -824,8 +824,8 @@ static int irda_bind(struct socket *sock + + err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name); + if (err < 0) { +- kfree(self->ias_obj->name); +- kfree(self->ias_obj); ++ irias_delete_object(self->ias_obj); ++ self->ias_obj = NULL; + goto out; + } + diff --git a/queue-2.6.35/l2tp-test-for-ethernet-header-in-l2tp_eth_dev_recv.patch b/queue-2.6.35/l2tp-test-for-ethernet-header-in-l2tp_eth_dev_recv.patch new file mode 100644 index 00000000000..8c8c81f3409 --- /dev/null +++ b/queue-2.6.35/l2tp-test-for-ethernet-header-in-l2tp_eth_dev_recv.patch @@ -0,0 +1,37 @@ +From c1debf1b013eabe9852ef9a61f51d99fbfed6e6a Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 25 Aug 2010 23:44:35 +0000 +Subject: l2tp: test for ethernet header in l2tp_eth_dev_recv() + + +From: Eric Dumazet + +[ Upstream commit bfc960a8eec023a170a80697fe65157cd4f44f81 ] + +close https://bugzilla.kernel.org/show_bug.cgi?id=16529 + +Before calling dev_forward_skb(), we should make sure skb head contains +at least an ethernet header, even if length included in upper layer said +so. Use pskb_may_pull() to make sure this ethernet header is present in +skb head. + +Reported-by: Thomas Heil +Reported-by: Ian Campbell +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_eth.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/l2tp/l2tp_eth.c ++++ b/net/l2tp/l2tp_eth.c +@@ -132,7 +132,7 @@ static void l2tp_eth_dev_recv(struct l2t + printk("\n"); + } + +- if (data_len < ETH_HLEN) ++ if (!pskb_may_pull(skb, sizeof(ETH_HLEN))) + goto error; + + secpath_reset(skb); diff --git a/queue-2.6.35/net-blackhole-route-should-always-be-recalculated.patch b/queue-2.6.35/net-blackhole-route-should-always-be-recalculated.patch new file mode 100644 index 00000000000..b8062d17e43 --- /dev/null +++ b/queue-2.6.35/net-blackhole-route-should-always-be-recalculated.patch @@ -0,0 +1,47 @@ +From cc50691e9451c5782d3e11caee24d6b29815f567 Mon Sep 17 00:00:00 2001 +From: Jianzhao Wang +Date: Wed, 8 Sep 2010 14:35:43 -0700 +Subject: net: blackhole route should always be recalculated + + +From: Jianzhao Wang + +[ Upstream commit ae2688d59b5f861dc70a091d003773975d2ae7fb ] + +Blackhole routes are used when xfrm_lookup() returns -EREMOTE (error +triggered by IKE for example), hence this kind of route is always +temporary and so we should check if a better route exists for next +packets. +Bug has been introduced by commit d11a4dc18bf41719c9f0d7ed494d295dd2973b92. + +Signed-off-by: Jianzhao Wang +Signed-off-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -2741,6 +2741,11 @@ slow_output: + + EXPORT_SYMBOL_GPL(__ip_route_output_key); + ++static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) ++{ ++ return NULL; ++} ++ + static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) + { + } +@@ -2749,7 +2754,7 @@ static struct dst_ops ipv4_dst_blackhole + .family = AF_INET, + .protocol = cpu_to_be16(ETH_P_IP), + .destroy = ipv4_dst_destroy, +- .check = ipv4_dst_check, ++ .check = ipv4_blackhole_dst_check, + .update_pmtu = ipv4_rt_blackhole_update_pmtu, + .entries = ATOMIC_INIT(0), + }; diff --git a/queue-2.6.35/net-rps-needs-to-depend-upon-use_generic_smp_helpers.patch b/queue-2.6.35/net-rps-needs-to-depend-upon-use_generic_smp_helpers.patch new file mode 100644 index 00000000000..7ee3c9b94ad --- /dev/null +++ b/queue-2.6.35/net-rps-needs-to-depend-upon-use_generic_smp_helpers.patch @@ -0,0 +1,31 @@ +From b8342a5f5557f5037913bb9d5210cce98b0fdef4 Mon Sep 17 00:00:00 2001 +From: David S. Miller +Date: Tue, 14 Sep 2010 21:41:20 -0700 +Subject: net: RPS needs to depend upon USE_GENERIC_SMP_HELPERS + + +From: David S. Miller + +[ Upstream commit 6dcbc12290abb452a5e42713faa6461b248e2f55 ] + +You cannot invoke __smp_call_function_single() unless the +architecture sets this symbol. + +Reported-by: Daniel Hellstrom +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/Kconfig ++++ b/net/Kconfig +@@ -206,7 +206,7 @@ source "net/dcb/Kconfig" + + config RPS + boolean +- depends on SMP && SYSFS ++ depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS + default y + + menu "Network testing" diff --git a/queue-2.6.35/rds-fix-a-leak-of-kernel-memory.patch b/queue-2.6.35/rds-fix-a-leak-of-kernel-memory.patch new file mode 100644 index 00000000000..268a139cb5a --- /dev/null +++ b/queue-2.6.35/rds-fix-a-leak-of-kernel-memory.patch @@ -0,0 +1,32 @@ +From 7c70a5a1589a7ad110c15b0d80635cc925e84cb2 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 16 Aug 2010 03:25:00 +0000 +Subject: rds: fix a leak of kernel memory + + +From: Eric Dumazet + +[ Upstream commit f037590fff3005ce8a1513858d7d44f50053cc8f ] + +struct rds_rdma_notify contains a 32 bits hole on 64bit arches, +make sure it is zeroed before copying it to user. + +Signed-off-by: Eric Dumazet +CC: Andy Grover +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rds/recv.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/rds/recv.c ++++ b/net/rds/recv.c +@@ -297,7 +297,7 @@ static int rds_still_queued(struct rds_s + int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr) + { + struct rds_notifier *notifier; +- struct rds_rdma_notify cmsg; ++ struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */ + unsigned int count = 0, max_messages = ~0U; + unsigned long flags; + LIST_HEAD(copy); diff --git a/queue-2.6.35/sparc64-get-rid-of-indirect-p1275-prom-call-buffer.patch b/queue-2.6.35/sparc64-get-rid-of-indirect-p1275-prom-call-buffer.patch new file mode 100644 index 00000000000..ef8ee9eb8ea --- /dev/null +++ b/queue-2.6.35/sparc64-get-rid-of-indirect-p1275-prom-call-buffer.patch @@ -0,0 +1,1235 @@ +From ccf013f24df45854a358c8c53f7fa87a39f795d0 Mon Sep 17 00:00:00 2001 +From: David S. Miller +Date: Mon, 23 Aug 2010 23:10:57 -0700 +Subject: sparc64: Get rid of indirect p1275 PROM call buffer. + + +From: David S. Miller + +[ Upstream commit 25edd6946a1d74e5e77813c2324a0908c68bcf9e ] + +This is based upon a report by Meelis Roos showing that it's possible +that we'll try to fetch a property that is 32K in size with some +devices. With the current fixed 3K buffer we use for moving data in +and out of the firmware during PROM calls, that simply won't work. + +In fact, it will scramble random kernel data during bootup. + +The reasoning behind the temporary buffer is entirely historical. It +used to be the case that we had problems referencing dynamic kernel +memory (including the stack) early in the boot process before we +explicitly told the firwmare to switch us over to the kernel trap +table. + +So what we did was always give the firmware buffers that were locked +into the main kernel image. + +But we no longer have problems like that, so get rid of all of this +indirect bounce buffering. + +Besides fixing Meelis's bug, this also makes the kernel data about 3K +smaller. + +It was also discovered during these conversions that the +implementation of prom_retain() was completely wrong, so that was +fixed here as well. Currently that interface is not in use. + +Reported-by: Meelis Roos +Tested-by: Meelis Roos +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/oplib_64.h | 27 --- + arch/sparc/prom/cif.S | 16 - + arch/sparc/prom/console_64.c | 48 ++++- + arch/sparc/prom/devops_64.c | 36 +++- + arch/sparc/prom/misc_64.c | 314 ++++++++++++++++++++++++++------------ + arch/sparc/prom/p1275.c | 102 ------------ + arch/sparc/prom/tree_64.c | 210 ++++++++++++++++++------- + 7 files changed, 456 insertions(+), 297 deletions(-) + +--- a/arch/sparc/include/asm/oplib_64.h ++++ b/arch/sparc/include/asm/oplib_64.h +@@ -185,9 +185,8 @@ extern int prom_getunumber(int syndrome_ + char *buf, int buflen); + + /* Retain physical memory to the caller across soft resets. */ +-extern unsigned long prom_retain(const char *name, +- unsigned long pa_low, unsigned long pa_high, +- long size, long align); ++extern int prom_retain(const char *name, unsigned long size, ++ unsigned long align, unsigned long *paddr); + + /* Load explicit I/D TLB entries into the calling processor. */ + extern long prom_itlb_load(unsigned long index, +@@ -287,26 +286,6 @@ extern void prom_sun4v_guest_soft_state( + extern int prom_ihandle2path(int handle, char *buffer, int bufsize); + + /* Client interface level routines. */ +-extern long p1275_cmd(const char *, long, ...); +- +-#if 0 +-#define P1275_SIZE(x) ((((long)((x) / 32)) << 32) | (x)) +-#else +-#define P1275_SIZE(x) x +-#endif +- +-/* We support at most 16 input and 1 output argument */ +-#define P1275_ARG_NUMBER 0 +-#define P1275_ARG_IN_STRING 1 +-#define P1275_ARG_OUT_BUF 2 +-#define P1275_ARG_OUT_32B 3 +-#define P1275_ARG_IN_FUNCTION 4 +-#define P1275_ARG_IN_BUF 5 +-#define P1275_ARG_IN_64B 6 +- +-#define P1275_IN(x) ((x) & 0xf) +-#define P1275_OUT(x) (((x) << 4) & 0xf0) +-#define P1275_INOUT(i,o) (P1275_IN(i)|P1275_OUT(o)) +-#define P1275_ARG(n,x) ((x) << ((n)*3 + 8)) ++extern void p1275_cmd_direct(unsigned long *); + + #endif /* !(__SPARC64_OPLIB_H) */ +--- a/arch/sparc/prom/cif.S ++++ b/arch/sparc/prom/cif.S +@@ -9,18 +9,18 @@ + #include + + .text +- .globl prom_cif_interface +-prom_cif_interface: +- sethi %hi(p1275buf), %o0 +- or %o0, %lo(p1275buf), %o0 +- ldx [%o0 + 0x010], %o1 ! prom_cif_stack +- save %o1, -192, %sp +- ldx [%i0 + 0x008], %l2 ! prom_cif_handler ++ .globl prom_cif_direct ++prom_cif_direct: ++ sethi %hi(p1275buf), %o1 ++ or %o1, %lo(p1275buf), %o1 ++ ldx [%o1 + 0x0010], %o2 ! prom_cif_stack ++ save %o2, -192, %sp ++ ldx [%i1 + 0x0008], %l2 ! prom_cif_handler + mov %g4, %l0 + mov %g5, %l1 + mov %g6, %l3 + call %l2 +- add %i0, 0x018, %o0 ! prom_args ++ mov %i0, %o0 ! prom_args + mov %l0, %g4 + mov %l1, %g5 + mov %l3, %g6 +--- a/arch/sparc/prom/console_64.c ++++ b/arch/sparc/prom/console_64.c +@@ -21,14 +21,22 @@ extern int prom_stdin, prom_stdout; + inline int + prom_nbgetchar(void) + { ++ unsigned long args[7]; + char inc; + +- if (p1275_cmd("read", P1275_ARG(1,P1275_ARG_OUT_BUF)| +- P1275_INOUT(3,1), +- prom_stdin, &inc, P1275_SIZE(1)) == 1) ++ args[0] = (unsigned long) "read"; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) prom_stdin; ++ args[4] = (unsigned long) &inc; ++ args[5] = 1; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ if (args[6] == 1) + return inc; +- else +- return -1; ++ return -1; + } + + /* Non blocking put character to console device, returns -1 if +@@ -37,12 +45,22 @@ prom_nbgetchar(void) + inline int + prom_nbputchar(char c) + { ++ unsigned long args[7]; + char outc; + + outc = c; +- if (p1275_cmd("write", P1275_ARG(1,P1275_ARG_IN_BUF)| +- P1275_INOUT(3,1), +- prom_stdout, &outc, P1275_SIZE(1)) == 1) ++ ++ args[0] = (unsigned long) "write"; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) prom_stdout; ++ args[4] = (unsigned long) &outc; ++ args[5] = 1; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ if (args[6] == 1) + return 0; + else + return -1; +@@ -67,7 +85,15 @@ prom_putchar(char c) + void + prom_puts(const char *s, int len) + { +- p1275_cmd("write", P1275_ARG(1,P1275_ARG_IN_BUF)| +- P1275_INOUT(3,1), +- prom_stdout, s, P1275_SIZE(len)); ++ unsigned long args[7]; ++ ++ args[0] = (unsigned long) "write"; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) prom_stdout; ++ args[4] = (unsigned long) s; ++ args[5] = len; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); + } +--- a/arch/sparc/prom/devops_64.c ++++ b/arch/sparc/prom/devops_64.c +@@ -18,16 +18,32 @@ + int + prom_devopen(const char *dstr) + { +- return p1275_cmd ("open", P1275_ARG(0,P1275_ARG_IN_STRING)| +- P1275_INOUT(1,1), +- dstr); ++ unsigned long args[5]; ++ ++ args[0] = (unsigned long) "open"; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned long) dstr; ++ args[4] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[4]; + } + + /* Close the device described by device handle 'dhandle'. */ + int + prom_devclose(int dhandle) + { +- p1275_cmd ("close", P1275_INOUT(1,0), dhandle); ++ unsigned long args[4]; ++ ++ args[0] = (unsigned long) "close"; ++ args[1] = 1; ++ args[2] = 0; ++ args[3] = (unsigned int) dhandle; ++ ++ p1275_cmd_direct(args); ++ + return 0; + } + +@@ -37,5 +53,15 @@ prom_devclose(int dhandle) + void + prom_seek(int dhandle, unsigned int seekhi, unsigned int seeklo) + { +- p1275_cmd ("seek", P1275_INOUT(3,1), dhandle, seekhi, seeklo); ++ unsigned long args[7]; ++ ++ args[0] = (unsigned long) "seek"; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) dhandle; ++ args[4] = seekhi; ++ args[5] = seeklo; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); + } +--- a/arch/sparc/prom/misc_64.c ++++ b/arch/sparc/prom/misc_64.c +@@ -20,10 +20,17 @@ + + int prom_service_exists(const char *service_name) + { +- int err = p1275_cmd("test", P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_INOUT(1, 1), service_name); ++ unsigned long args[5]; + +- if (err) ++ args[0] = (unsigned long) "test"; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned long) service_name; ++ args[4] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ if (args[4]) + return 0; + return 1; + } +@@ -31,30 +38,47 @@ int prom_service_exists(const char *serv + void prom_sun4v_guest_soft_state(void) + { + const char *svc = "SUNW,soft-state-supported"; ++ unsigned long args[3]; + + if (!prom_service_exists(svc)) + return; +- p1275_cmd(svc, P1275_INOUT(0, 0)); ++ args[0] = (unsigned long) svc; ++ args[1] = 0; ++ args[2] = 0; ++ p1275_cmd_direct(args); + } + + /* Reset and reboot the machine with the command 'bcommand'. */ + void prom_reboot(const char *bcommand) + { ++ unsigned long args[4]; ++ + #ifdef CONFIG_SUN_LDOMS + if (ldom_domaining_enabled) + ldom_reboot(bcommand); + #endif +- p1275_cmd("boot", P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_INOUT(1, 0), bcommand); ++ args[0] = (unsigned long) "boot"; ++ args[1] = 1; ++ args[2] = 0; ++ args[3] = (unsigned long) bcommand; ++ ++ p1275_cmd_direct(args); + } + + /* Forth evaluate the expression contained in 'fstring'. */ + void prom_feval(const char *fstring) + { ++ unsigned long args[5]; ++ + if (!fstring || fstring[0] == 0) + return; +- p1275_cmd("interpret", P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_INOUT(1, 1), fstring); ++ args[0] = (unsigned long) "interpret"; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned long) fstring; ++ args[4] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); + } + EXPORT_SYMBOL(prom_feval); + +@@ -68,6 +92,7 @@ extern void smp_release(void); + */ + void prom_cmdline(void) + { ++ unsigned long args[3]; + unsigned long flags; + + local_irq_save(flags); +@@ -76,7 +101,11 @@ void prom_cmdline(void) + smp_capture(); + #endif + +- p1275_cmd("enter", P1275_INOUT(0, 0)); ++ args[0] = (unsigned long) "enter"; ++ args[1] = 0; ++ args[2] = 0; ++ ++ p1275_cmd_direct(args); + + #ifdef CONFIG_SMP + smp_release(); +@@ -90,22 +119,32 @@ void prom_cmdline(void) + */ + void notrace prom_halt(void) + { ++ unsigned long args[3]; ++ + #ifdef CONFIG_SUN_LDOMS + if (ldom_domaining_enabled) + ldom_power_off(); + #endif + again: +- p1275_cmd("exit", P1275_INOUT(0, 0)); ++ args[0] = (unsigned long) "exit"; ++ args[1] = 0; ++ args[2] = 0; ++ p1275_cmd_direct(args); + goto again; /* PROM is out to get me -DaveM */ + } + + void prom_halt_power_off(void) + { ++ unsigned long args[3]; ++ + #ifdef CONFIG_SUN_LDOMS + if (ldom_domaining_enabled) + ldom_power_off(); + #endif +- p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0)); ++ args[0] = (unsigned long) "SUNW,power-off"; ++ args[1] = 0; ++ args[2] = 0; ++ p1275_cmd_direct(args); + + /* if nothing else helps, we just halt */ + prom_halt(); +@@ -114,10 +153,15 @@ void prom_halt_power_off(void) + /* Set prom sync handler to call function 'funcp'. */ + void prom_setcallback(callback_func_t funcp) + { ++ unsigned long args[5]; + if (!funcp) + return; +- p1275_cmd("set-callback", P1275_ARG(0, P1275_ARG_IN_FUNCTION) | +- P1275_INOUT(1, 1), funcp); ++ args[0] = (unsigned long) "set-callback"; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned long) funcp; ++ args[4] = (unsigned long) -1; ++ p1275_cmd_direct(args); + } + + /* Get the idprom and stuff it into buffer 'idbuf'. Returns the +@@ -173,57 +217,61 @@ static int prom_get_memory_ihandle(void) + } + + /* Load explicit I/D TLB entries. */ ++static long tlb_load(const char *type, unsigned long index, ++ unsigned long tte_data, unsigned long vaddr) ++{ ++ unsigned long args[9]; ++ ++ args[0] = (unsigned long) prom_callmethod_name; ++ args[1] = 5; ++ args[2] = 1; ++ args[3] = (unsigned long) type; ++ args[4] = (unsigned int) prom_get_mmu_ihandle(); ++ args[5] = vaddr; ++ args[6] = tte_data; ++ args[7] = index; ++ args[8] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (long) args[8]; ++} ++ + long prom_itlb_load(unsigned long index, + unsigned long tte_data, + unsigned long vaddr) + { +- return p1275_cmd(prom_callmethod_name, +- (P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_ARG(2, P1275_ARG_IN_64B) | +- P1275_ARG(3, P1275_ARG_IN_64B) | +- P1275_INOUT(5, 1)), +- "SUNW,itlb-load", +- prom_get_mmu_ihandle(), +- /* And then our actual args are pushed backwards. */ +- vaddr, +- tte_data, +- index); ++ return tlb_load("SUNW,itlb-load", index, tte_data, vaddr); + } + + long prom_dtlb_load(unsigned long index, + unsigned long tte_data, + unsigned long vaddr) + { +- return p1275_cmd(prom_callmethod_name, +- (P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_ARG(2, P1275_ARG_IN_64B) | +- P1275_ARG(3, P1275_ARG_IN_64B) | +- P1275_INOUT(5, 1)), +- "SUNW,dtlb-load", +- prom_get_mmu_ihandle(), +- /* And then our actual args are pushed backwards. */ +- vaddr, +- tte_data, +- index); ++ return tlb_load("SUNW,dtlb-load", index, tte_data, vaddr); + } + + int prom_map(int mode, unsigned long size, + unsigned long vaddr, unsigned long paddr) + { +- int ret = p1275_cmd(prom_callmethod_name, +- (P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_ARG(3, P1275_ARG_IN_64B) | +- P1275_ARG(4, P1275_ARG_IN_64B) | +- P1275_ARG(6, P1275_ARG_IN_64B) | +- P1275_INOUT(7, 1)), +- prom_map_name, +- prom_get_mmu_ihandle(), +- mode, +- size, +- vaddr, +- 0, +- paddr); ++ unsigned long args[11]; ++ int ret; + ++ args[0] = (unsigned long) prom_callmethod_name; ++ args[1] = 7; ++ args[2] = 1; ++ args[3] = (unsigned long) prom_map_name; ++ args[4] = (unsigned int) prom_get_mmu_ihandle(); ++ args[5] = (unsigned int) mode; ++ args[6] = size; ++ args[7] = vaddr; ++ args[8] = 0; ++ args[9] = paddr; ++ args[10] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ ret = (int) args[10]; + if (ret == 0) + ret = -1; + return ret; +@@ -231,40 +279,51 @@ int prom_map(int mode, unsigned long siz + + void prom_unmap(unsigned long size, unsigned long vaddr) + { +- p1275_cmd(prom_callmethod_name, +- (P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_ARG(2, P1275_ARG_IN_64B) | +- P1275_ARG(3, P1275_ARG_IN_64B) | +- P1275_INOUT(4, 0)), +- prom_unmap_name, +- prom_get_mmu_ihandle(), +- size, +- vaddr); ++ unsigned long args[7]; ++ ++ args[0] = (unsigned long) prom_callmethod_name; ++ args[1] = 4; ++ args[2] = 0; ++ args[3] = (unsigned long) prom_unmap_name; ++ args[4] = (unsigned int) prom_get_mmu_ihandle(); ++ args[5] = size; ++ args[6] = vaddr; ++ ++ p1275_cmd_direct(args); + } + + /* Set aside physical memory which is not touched or modified + * across soft resets. + */ +-unsigned long prom_retain(const char *name, +- unsigned long pa_low, unsigned long pa_high, +- long size, long align) +-{ +- /* XXX I don't think we return multiple values correctly. +- * XXX OBP supposedly returns pa_low/pa_high here, how does +- * XXX it work? +- */ ++int prom_retain(const char *name, unsigned long size, ++ unsigned long align, unsigned long *paddr) ++{ ++ unsigned long args[11]; + +- /* If align is zero, the pa_low/pa_high args are passed, +- * else they are not. ++ args[0] = (unsigned long) prom_callmethod_name; ++ args[1] = 5; ++ args[2] = 3; ++ args[3] = (unsigned long) "SUNW,retain"; ++ args[4] = (unsigned int) prom_get_memory_ihandle(); ++ args[5] = align; ++ args[6] = size; ++ args[7] = (unsigned long) name; ++ args[8] = (unsigned long) -1; ++ args[9] = (unsigned long) -1; ++ args[10] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ if (args[8]) ++ return (int) args[8]; ++ ++ /* Next we get "phys_high" then "phys_low". On 64-bit ++ * the phys_high cell is don't care since the phys_low ++ * cell has the full value. + */ +- if (align == 0) +- return p1275_cmd("SUNW,retain", +- (P1275_ARG(0, P1275_ARG_IN_BUF) | P1275_INOUT(5, 2)), +- name, pa_low, pa_high, size, align); +- else +- return p1275_cmd("SUNW,retain", +- (P1275_ARG(0, P1275_ARG_IN_BUF) | P1275_INOUT(3, 2)), +- name, size, align); ++ *paddr = args[10]; ++ ++ return 0; + } + + /* Get "Unumber" string for the SIMM at the given +@@ -277,62 +336,129 @@ int prom_getunumber(int syndrome_code, + unsigned long phys_addr, + char *buf, int buflen) + { +- return p1275_cmd(prom_callmethod_name, +- (P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_ARG(3, P1275_ARG_OUT_BUF) | +- P1275_ARG(6, P1275_ARG_IN_64B) | +- P1275_INOUT(8, 2)), +- "SUNW,get-unumber", prom_get_memory_ihandle(), +- buflen, buf, P1275_SIZE(buflen), +- 0, phys_addr, syndrome_code); ++ unsigned long args[12]; ++ ++ args[0] = (unsigned long) prom_callmethod_name; ++ args[1] = 7; ++ args[2] = 2; ++ args[3] = (unsigned long) "SUNW,get-unumber"; ++ args[4] = (unsigned int) prom_get_memory_ihandle(); ++ args[5] = buflen; ++ args[6] = (unsigned long) buf; ++ args[7] = 0; ++ args[8] = phys_addr; ++ args[9] = (unsigned int) syndrome_code; ++ args[10] = (unsigned long) -1; ++ args[11] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[10]; + } + + /* Power management extensions. */ + void prom_sleepself(void) + { +- p1275_cmd("SUNW,sleep-self", P1275_INOUT(0, 0)); ++ unsigned long args[3]; ++ ++ args[0] = (unsigned long) "SUNW,sleep-self"; ++ args[1] = 0; ++ args[2] = 0; ++ p1275_cmd_direct(args); + } + + int prom_sleepsystem(void) + { +- return p1275_cmd("SUNW,sleep-system", P1275_INOUT(0, 1)); ++ unsigned long args[4]; ++ ++ args[0] = (unsigned long) "SUNW,sleep-system"; ++ args[1] = 0; ++ args[2] = 1; ++ args[3] = (unsigned long) -1; ++ p1275_cmd_direct(args); ++ ++ return (int) args[3]; + } + + int prom_wakeupsystem(void) + { +- return p1275_cmd("SUNW,wakeup-system", P1275_INOUT(0, 1)); ++ unsigned long args[4]; ++ ++ args[0] = (unsigned long) "SUNW,wakeup-system"; ++ args[1] = 0; ++ args[2] = 1; ++ args[3] = (unsigned long) -1; ++ p1275_cmd_direct(args); ++ ++ return (int) args[3]; + } + + #ifdef CONFIG_SMP + void prom_startcpu(int cpunode, unsigned long pc, unsigned long arg) + { +- p1275_cmd("SUNW,start-cpu", P1275_INOUT(3, 0), cpunode, pc, arg); ++ unsigned long args[6]; ++ ++ args[0] = (unsigned long) "SUNW,start-cpu"; ++ args[1] = 3; ++ args[2] = 0; ++ args[3] = (unsigned int) cpunode; ++ args[4] = pc; ++ args[5] = arg; ++ p1275_cmd_direct(args); + } + + void prom_startcpu_cpuid(int cpuid, unsigned long pc, unsigned long arg) + { +- p1275_cmd("SUNW,start-cpu-by-cpuid", P1275_INOUT(3, 0), +- cpuid, pc, arg); ++ unsigned long args[6]; ++ ++ args[0] = (unsigned long) "SUNW,start-cpu-by-cpuid"; ++ args[1] = 3; ++ args[2] = 0; ++ args[3] = (unsigned int) cpuid; ++ args[4] = pc; ++ args[5] = arg; ++ p1275_cmd_direct(args); + } + + void prom_stopcpu_cpuid(int cpuid) + { +- p1275_cmd("SUNW,stop-cpu-by-cpuid", P1275_INOUT(1, 0), +- cpuid); ++ unsigned long args[4]; ++ ++ args[0] = (unsigned long) "SUNW,stop-cpu-by-cpuid"; ++ args[1] = 1; ++ args[2] = 0; ++ args[3] = (unsigned int) cpuid; ++ p1275_cmd_direct(args); + } + + void prom_stopself(void) + { +- p1275_cmd("SUNW,stop-self", P1275_INOUT(0, 0)); ++ unsigned long args[3]; ++ ++ args[0] = (unsigned long) "SUNW,stop-self"; ++ args[1] = 0; ++ args[2] = 0; ++ p1275_cmd_direct(args); + } + + void prom_idleself(void) + { +- p1275_cmd("SUNW,idle-self", P1275_INOUT(0, 0)); ++ unsigned long args[3]; ++ ++ args[0] = (unsigned long) "SUNW,idle-self"; ++ args[1] = 0; ++ args[2] = 0; ++ p1275_cmd_direct(args); + } + + void prom_resumecpu(int cpunode) + { +- p1275_cmd("SUNW,resume-cpu", P1275_INOUT(1, 0), cpunode); ++ unsigned long args[4]; ++ ++ args[0] = (unsigned long) "SUNW,resume-cpu"; ++ args[1] = 1; ++ args[2] = 0; ++ args[3] = (unsigned int) cpunode; ++ p1275_cmd_direct(args); + } + #endif +--- a/arch/sparc/prom/p1275.c ++++ b/arch/sparc/prom/p1275.c +@@ -22,13 +22,11 @@ struct { + long prom_callback; /* 0x00 */ + void (*prom_cif_handler)(long *); /* 0x08 */ + unsigned long prom_cif_stack; /* 0x10 */ +- unsigned long prom_args [23]; /* 0x18 */ +- char prom_buffer [3000]; + } p1275buf; + + extern void prom_world(int); + +-extern void prom_cif_interface(void); ++extern void prom_cif_direct(unsigned long *args); + extern void prom_cif_callback(void); + + /* +@@ -36,114 +34,20 @@ extern void prom_cif_callback(void); + */ + DEFINE_RAW_SPINLOCK(prom_entry_lock); + +-long p1275_cmd(const char *service, long fmt, ...) ++void p1275_cmd_direct(unsigned long *args) + { +- char *p, *q; + unsigned long flags; +- int nargs, nrets, i; +- va_list list; +- long attrs, x; +- +- p = p1275buf.prom_buffer; + + raw_local_save_flags(flags); + raw_local_irq_restore(PIL_NMI); + raw_spin_lock(&prom_entry_lock); + +- p1275buf.prom_args[0] = (unsigned long)p; /* service */ +- strcpy (p, service); +- p = (char *)(((long)(strchr (p, 0) + 8)) & ~7); +- p1275buf.prom_args[1] = nargs = (fmt & 0x0f); /* nargs */ +- p1275buf.prom_args[2] = nrets = ((fmt & 0xf0) >> 4); /* nrets */ +- attrs = fmt >> 8; +- va_start(list, fmt); +- for (i = 0; i < nargs; i++, attrs >>= 3) { +- switch (attrs & 0x7) { +- case P1275_ARG_NUMBER: +- p1275buf.prom_args[i + 3] = +- (unsigned)va_arg(list, long); +- break; +- case P1275_ARG_IN_64B: +- p1275buf.prom_args[i + 3] = +- va_arg(list, unsigned long); +- break; +- case P1275_ARG_IN_STRING: +- strcpy (p, va_arg(list, char *)); +- p1275buf.prom_args[i + 3] = (unsigned long)p; +- p = (char *)(((long)(strchr (p, 0) + 8)) & ~7); +- break; +- case P1275_ARG_OUT_BUF: +- (void) va_arg(list, char *); +- p1275buf.prom_args[i + 3] = (unsigned long)p; +- x = va_arg(list, long); +- i++; attrs >>= 3; +- p = (char *)(((long)(p + (int)x + 7)) & ~7); +- p1275buf.prom_args[i + 3] = x; +- break; +- case P1275_ARG_IN_BUF: +- q = va_arg(list, char *); +- p1275buf.prom_args[i + 3] = (unsigned long)p; +- x = va_arg(list, long); +- i++; attrs >>= 3; +- memcpy (p, q, (int)x); +- p = (char *)(((long)(p + (int)x + 7)) & ~7); +- p1275buf.prom_args[i + 3] = x; +- break; +- case P1275_ARG_OUT_32B: +- (void) va_arg(list, char *); +- p1275buf.prom_args[i + 3] = (unsigned long)p; +- p += 32; +- break; +- case P1275_ARG_IN_FUNCTION: +- p1275buf.prom_args[i + 3] = +- (unsigned long)prom_cif_callback; +- p1275buf.prom_callback = va_arg(list, long); +- break; +- } +- } +- va_end(list); +- + prom_world(1); +- prom_cif_interface(); ++ prom_cif_direct(args); + prom_world(0); + +- attrs = fmt >> 8; +- va_start(list, fmt); +- for (i = 0; i < nargs; i++, attrs >>= 3) { +- switch (attrs & 0x7) { +- case P1275_ARG_NUMBER: +- (void) va_arg(list, long); +- break; +- case P1275_ARG_IN_STRING: +- (void) va_arg(list, char *); +- break; +- case P1275_ARG_IN_FUNCTION: +- (void) va_arg(list, long); +- break; +- case P1275_ARG_IN_BUF: +- (void) va_arg(list, char *); +- (void) va_arg(list, long); +- i++; attrs >>= 3; +- break; +- case P1275_ARG_OUT_BUF: +- p = va_arg(list, char *); +- x = va_arg(list, long); +- memcpy (p, (char *)(p1275buf.prom_args[i + 3]), (int)x); +- i++; attrs >>= 3; +- break; +- case P1275_ARG_OUT_32B: +- p = va_arg(list, char *); +- memcpy (p, (char *)(p1275buf.prom_args[i + 3]), 32); +- break; +- } +- } +- va_end(list); +- x = p1275buf.prom_args [nargs + 3]; +- + raw_spin_unlock(&prom_entry_lock); + raw_local_irq_restore(flags); +- +- return x; + } + + void prom_cif_init(void *cif_handler, void *cif_stack) +--- a/arch/sparc/prom/tree_64.c ++++ b/arch/sparc/prom/tree_64.c +@@ -16,22 +16,39 @@ + #include + #include + ++static int prom_node_to_node(const char *type, int node) ++{ ++ unsigned long args[5]; ++ ++ args[0] = (unsigned long) type; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned int) node; ++ args[4] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[4]; ++} ++ + /* Return the child of node 'node' or zero if no this node has no + * direct descendent. + */ + inline int __prom_getchild(int node) + { +- return p1275_cmd ("child", P1275_INOUT(1, 1), node); ++ return prom_node_to_node("child", node); + } + + inline int prom_getchild(int node) + { + int cnode; + +- if(node == -1) return 0; ++ if (node == -1) ++ return 0; + cnode = __prom_getchild(node); +- if(cnode == -1) return 0; +- return (int)cnode; ++ if (cnode == -1) ++ return 0; ++ return cnode; + } + EXPORT_SYMBOL(prom_getchild); + +@@ -39,10 +56,12 @@ inline int prom_getparent(int node) + { + int cnode; + +- if(node == -1) return 0; +- cnode = p1275_cmd ("parent", P1275_INOUT(1, 1), node); +- if(cnode == -1) return 0; +- return (int)cnode; ++ if (node == -1) ++ return 0; ++ cnode = prom_node_to_node("parent", node); ++ if (cnode == -1) ++ return 0; ++ return cnode; + } + + /* Return the next sibling of node 'node' or zero if no more siblings +@@ -50,7 +69,7 @@ inline int prom_getparent(int node) + */ + inline int __prom_getsibling(int node) + { +- return p1275_cmd(prom_peer_name, P1275_INOUT(1, 1), node); ++ return prom_node_to_node(prom_peer_name, node); + } + + inline int prom_getsibling(int node) +@@ -72,11 +91,21 @@ EXPORT_SYMBOL(prom_getsibling); + */ + inline int prom_getproplen(int node, const char *prop) + { +- if((!node) || (!prop)) return -1; +- return p1275_cmd ("getproplen", +- P1275_ARG(1,P1275_ARG_IN_STRING)| +- P1275_INOUT(2, 1), +- node, prop); ++ unsigned long args[6]; ++ ++ if (!node || !prop) ++ return -1; ++ ++ args[0] = (unsigned long) "getproplen"; ++ args[1] = 2; ++ args[2] = 1; ++ args[3] = (unsigned int) node; ++ args[4] = (unsigned long) prop; ++ args[5] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[5]; + } + EXPORT_SYMBOL(prom_getproplen); + +@@ -87,19 +116,25 @@ EXPORT_SYMBOL(prom_getproplen); + inline int prom_getproperty(int node, const char *prop, + char *buffer, int bufsize) + { ++ unsigned long args[8]; + int plen; + + plen = prom_getproplen(node, prop); +- if ((plen > bufsize) || (plen == 0) || (plen == -1)) { ++ if ((plen > bufsize) || (plen == 0) || (plen == -1)) + return -1; +- } else { +- /* Ok, things seem all right. */ +- return p1275_cmd(prom_getprop_name, +- P1275_ARG(1,P1275_ARG_IN_STRING)| +- P1275_ARG(2,P1275_ARG_OUT_BUF)| +- P1275_INOUT(4, 1), +- node, prop, buffer, P1275_SIZE(plen)); +- } ++ ++ args[0] = (unsigned long) prom_getprop_name; ++ args[1] = 4; ++ args[2] = 1; ++ args[3] = (unsigned int) node; ++ args[4] = (unsigned long) prop; ++ args[5] = (unsigned long) buffer; ++ args[6] = bufsize; ++ args[7] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[7]; + } + EXPORT_SYMBOL(prom_getproperty); + +@@ -110,7 +145,7 @@ inline int prom_getint(int node, const c + { + int intprop; + +- if(prom_getproperty(node, prop, (char *) &intprop, sizeof(int)) != -1) ++ if (prom_getproperty(node, prop, (char *) &intprop, sizeof(int)) != -1) + return intprop; + + return -1; +@@ -126,7 +161,8 @@ int prom_getintdefault(int node, const c + int retval; + + retval = prom_getint(node, property); +- if(retval == -1) return deflt; ++ if (retval == -1) ++ return deflt; + + return retval; + } +@@ -138,7 +174,8 @@ int prom_getbool(int node, const char *p + int retval; + + retval = prom_getproplen(node, prop); +- if(retval == -1) return 0; ++ if (retval == -1) ++ return 0; + return 1; + } + EXPORT_SYMBOL(prom_getbool); +@@ -152,7 +189,8 @@ void prom_getstring(int node, const char + int len; + + len = prom_getproperty(node, prop, user_buf, ubuf_size); +- if(len != -1) return; ++ if (len != -1) ++ return; + user_buf[0] = 0; + } + EXPORT_SYMBOL(prom_getstring); +@@ -164,7 +202,8 @@ int prom_nodematch(int node, const char + { + char namebuf[128]; + prom_getproperty(node, "name", namebuf, sizeof(namebuf)); +- if(strcmp(namebuf, name) == 0) return 1; ++ if (strcmp(namebuf, name) == 0) ++ return 1; + return 0; + } + +@@ -190,16 +229,29 @@ int prom_searchsiblings(int node_start, + } + EXPORT_SYMBOL(prom_searchsiblings); + ++static const char *prom_nextprop_name = "nextprop"; ++ + /* Return the first property type for node 'node'. + * buffer should be at least 32B in length + */ + inline char *prom_firstprop(int node, char *buffer) + { ++ unsigned long args[7]; ++ + *buffer = 0; +- if(node == -1) return buffer; +- p1275_cmd ("nextprop", P1275_ARG(2,P1275_ARG_OUT_32B)| +- P1275_INOUT(3, 0), +- node, (char *) 0x0, buffer); ++ if (node == -1) ++ return buffer; ++ ++ args[0] = (unsigned long) prom_nextprop_name; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) node; ++ args[4] = 0; ++ args[5] = (unsigned long) buffer; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ + return buffer; + } + EXPORT_SYMBOL(prom_firstprop); +@@ -210,9 +262,10 @@ EXPORT_SYMBOL(prom_firstprop); + */ + inline char *prom_nextprop(int node, const char *oprop, char *buffer) + { ++ unsigned long args[7]; + char buf[32]; + +- if(node == -1) { ++ if (node == -1) { + *buffer = 0; + return buffer; + } +@@ -220,10 +273,17 @@ inline char *prom_nextprop(int node, con + strcpy (buf, oprop); + oprop = buf; + } +- p1275_cmd ("nextprop", P1275_ARG(1,P1275_ARG_IN_STRING)| +- P1275_ARG(2,P1275_ARG_OUT_32B)| +- P1275_INOUT(3, 0), +- node, oprop, buffer); ++ ++ args[0] = (unsigned long) prom_nextprop_name; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) node; ++ args[4] = (unsigned long) oprop; ++ args[5] = (unsigned long) buffer; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ + return buffer; + } + EXPORT_SYMBOL(prom_nextprop); +@@ -231,12 +291,19 @@ EXPORT_SYMBOL(prom_nextprop); + int + prom_finddevice(const char *name) + { ++ unsigned long args[5]; ++ + if (!name) + return 0; +- return p1275_cmd(prom_finddev_name, +- P1275_ARG(0,P1275_ARG_IN_STRING)| +- P1275_INOUT(1, 1), +- name); ++ args[0] = (unsigned long) "finddevice"; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned long) name; ++ args[4] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[4]; + } + EXPORT_SYMBOL(prom_finddevice); + +@@ -247,7 +314,7 @@ int prom_node_has_property(int node, con + *buf = 0; + do { + prom_nextprop(node, buf, buf); +- if(!strcmp(buf, prop)) ++ if (!strcmp(buf, prop)) + return 1; + } while (*buf); + return 0; +@@ -260,6 +327,8 @@ EXPORT_SYMBOL(prom_node_has_property); + int + prom_setprop(int node, const char *pname, char *value, int size) + { ++ unsigned long args[8]; ++ + if (size == 0) + return 0; + if ((pname == 0) || (value == 0)) +@@ -271,19 +340,37 @@ prom_setprop(int node, const char *pname + return 0; + } + #endif +- return p1275_cmd ("setprop", P1275_ARG(1,P1275_ARG_IN_STRING)| +- P1275_ARG(2,P1275_ARG_IN_BUF)| +- P1275_INOUT(4, 1), +- node, pname, value, P1275_SIZE(size)); ++ args[0] = (unsigned long) "setprop"; ++ args[1] = 4; ++ args[2] = 1; ++ args[3] = (unsigned int) node; ++ args[4] = (unsigned long) pname; ++ args[5] = (unsigned long) value; ++ args[6] = size; ++ args[7] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[7]; + } + EXPORT_SYMBOL(prom_setprop); + + inline int prom_inst2pkg(int inst) + { ++ unsigned long args[5]; + int node; + +- node = p1275_cmd ("instance-to-package", P1275_INOUT(1, 1), inst); +- if (node == -1) return 0; ++ args[0] = (unsigned long) "instance-to-package"; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned int) inst; ++ args[4] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ node = (int) args[4]; ++ if (node == -1) ++ return 0; + return node; + } + +@@ -296,17 +383,28 @@ prom_pathtoinode(const char *path) + int node, inst; + + inst = prom_devopen (path); +- if (inst == 0) return 0; +- node = prom_inst2pkg (inst); +- prom_devclose (inst); +- if (node == -1) return 0; ++ if (inst == 0) ++ return 0; ++ node = prom_inst2pkg(inst); ++ prom_devclose(inst); ++ if (node == -1) ++ return 0; + return node; + } + + int prom_ihandle2path(int handle, char *buffer, int bufsize) + { +- return p1275_cmd("instance-to-path", +- P1275_ARG(1,P1275_ARG_OUT_BUF)| +- P1275_INOUT(3, 1), +- handle, buffer, P1275_SIZE(bufsize)); ++ unsigned long args[7]; ++ ++ args[0] = (unsigned long) "instance-to-path"; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) handle; ++ args[4] = (unsigned long) buffer; ++ args[5] = bufsize; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[6]; + } diff --git a/queue-2.6.35/tcp-combat-per-cpu-skew-in-orphan-tests.patch b/queue-2.6.35/tcp-combat-per-cpu-skew-in-orphan-tests.patch new file mode 100644 index 00000000000..cf3edff8ff2 --- /dev/null +++ b/queue-2.6.35/tcp-combat-per-cpu-skew-in-orphan-tests.patch @@ -0,0 +1,97 @@ +From 9e6ade922d7ff8240fa791f8b6fd50701b01998c Mon Sep 17 00:00:00 2001 +From: David S. Miller +Date: Wed, 25 Aug 2010 02:27:49 -0700 +Subject: tcp: Combat per-cpu skew in orphan tests. + + +From: David S. Miller + +[ Upstream commit ad1af0fedba14f82b240a03fe20eb9b2fdbd0357 ] + +As reported by Anton Blanchard when we use +percpu_counter_read_positive() to make our orphan socket limit checks, +the check can be off by up to num_cpus_online() * batch (which is 32 +by default) which on a 128 cpu machine can be as large as the default +orphan limit itself. + +Fix this by doing the full expensive sum check if the optimized check +triggers. + +Reported-by: Anton Blanchard +Signed-off-by: David S. Miller +Acked-by: Eric Dumazet +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 18 ++++++++++++++---- + net/ipv4/tcp.c | 5 +---- + net/ipv4/tcp_timer.c | 8 ++++---- + 3 files changed, 19 insertions(+), 12 deletions(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -268,11 +268,21 @@ static inline int between(__u32 seq1, __ + return seq3 - seq2 >= seq1 - seq2; + } + +-static inline int tcp_too_many_orphans(struct sock *sk, int num) ++static inline bool tcp_too_many_orphans(struct sock *sk, int shift) + { +- return (num > sysctl_tcp_max_orphans) || +- (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && +- atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]); ++ struct percpu_counter *ocp = sk->sk_prot->orphan_count; ++ int orphans = percpu_counter_read_positive(ocp); ++ ++ if (orphans << shift > sysctl_tcp_max_orphans) { ++ orphans = percpu_counter_sum_positive(ocp); ++ if (orphans << shift > sysctl_tcp_max_orphans) ++ return true; ++ } ++ ++ if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && ++ atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) ++ return true; ++ return false; + } + + /* syncookies: remember time of last synqueue overflow */ +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2002,11 +2002,8 @@ adjudge_to_death: + } + } + if (sk->sk_state != TCP_CLOSE) { +- int orphan_count = percpu_counter_read_positive( +- sk->sk_prot->orphan_count); +- + sk_mem_reclaim(sk); +- if (tcp_too_many_orphans(sk, orphan_count)) { ++ if (tcp_too_many_orphans(sk, 0)) { + if (net_ratelimit()) + printk(KERN_INFO "TCP: too many of orphaned " + "sockets\n"); +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -67,18 +67,18 @@ static void tcp_write_err(struct sock *s + static int tcp_out_of_resources(struct sock *sk, int do_reset) + { + struct tcp_sock *tp = tcp_sk(sk); +- int orphans = percpu_counter_read_positive(&tcp_orphan_count); ++ int shift = 0; + + /* If peer does not open window for long time, or did not transmit + * anything for long time, penalize it. */ + if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) +- orphans <<= 1; ++ shift++; + + /* If some dubious ICMP arrived, penalize even more. */ + if (sk->sk_err_soft) +- orphans <<= 1; ++ shift++; + +- if (tcp_too_many_orphans(sk, orphans)) { ++ if (tcp_too_many_orphans(sk, shift)) { + if (net_ratelimit()) + printk(KERN_INFO "Out of socket memory\n"); + diff --git a/queue-2.6.35/tcp-fix-three-tcp-sysctls-tuning.patch b/queue-2.6.35/tcp-fix-three-tcp-sysctls-tuning.patch new file mode 100644 index 00000000000..ef98226d155 --- /dev/null +++ b/queue-2.6.35/tcp-fix-three-tcp-sysctls-tuning.patch @@ -0,0 +1,76 @@ +From 2b40c537e4e84747f7485b8cb13b06c0061c91c0 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 25 Aug 2010 23:02:17 -0700 +Subject: tcp: fix three tcp sysctls tuning + + +From: Eric Dumazet + +[ Upstream commit c5ed63d66f24fd4f7089b5a6e087b0ce7202aa8e ] + +As discovered by Anton Blanchard, current code to autotune +tcp_death_row.sysctl_max_tw_buckets, sysctl_tcp_max_orphans and +sysctl_max_syn_backlog makes little sense. + +The bigger a page is, the less tcp_max_orphans is : 4096 on a 512GB +machine in Anton's case. + +(tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)) +is much bigger if spinlock debugging is on. Its wrong to select bigger +limits in this case (where kernel structures are also bigger) + +bhash_size max is 65536, and we get this value even for small machines. + +A better ground is to use size of ehash table, this also makes code +shorter and more obvious. + +Based on a patch from Anton, and another from David. + +Reported-and-tested-by: Anton Blanchard +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 24 +++++++----------------- + 1 file changed, 7 insertions(+), 17 deletions(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3193,7 +3193,7 @@ void __init tcp_init(void) + { + struct sk_buff *skb = NULL; + unsigned long nr_pages, limit; +- int order, i, max_share; ++ int i, max_share, cnt; + unsigned long jiffy = jiffies; + + BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); +@@ -3242,22 +3242,12 @@ void __init tcp_init(void) + INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); + } + +- /* Try to be a bit smarter and adjust defaults depending +- * on available memory. +- */ +- for (order = 0; ((1 << order) << PAGE_SHIFT) < +- (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); +- order++) +- ; +- if (order >= 4) { +- tcp_death_row.sysctl_max_tw_buckets = 180000; +- sysctl_tcp_max_orphans = 4096 << (order - 4); +- sysctl_max_syn_backlog = 1024; +- } else if (order < 3) { +- tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); +- sysctl_tcp_max_orphans >>= (3 - order); +- sysctl_max_syn_backlog = 128; +- } ++ ++ cnt = tcp_hashinfo.ehash_mask + 1; ++ ++ tcp_death_row.sysctl_max_tw_buckets = cnt / 2; ++ sysctl_tcp_max_orphans = cnt / 2; ++ sysctl_max_syn_backlog = max(128, cnt / 256); + + /* Set the pressure threshold to be a fraction of global memory that + * is up to 1/2 at 256 MB, decreasing toward zero with the amount of diff --git a/queue-2.6.35/tcp-prevent-overzealous-packetization-by-sws-logic.patch b/queue-2.6.35/tcp-prevent-overzealous-packetization-by-sws-logic.patch new file mode 100644 index 00000000000..2e79dd30dd7 --- /dev/null +++ b/queue-2.6.35/tcp-prevent-overzealous-packetization-by-sws-logic.patch @@ -0,0 +1,55 @@ +From 945c01a93b3fea8a0d8a837fb98ff0ec6613207a Mon Sep 17 00:00:00 2001 +From: Alexey Kuznetsov +Date: Wed, 15 Sep 2010 10:27:52 -0700 +Subject: tcp: Prevent overzealous packetization by SWS logic. + +From: Alexey Kuznetsov + +[ Upstream commit 01f83d69844d307be2aa6fea88b0e8fe5cbdb2f4 ] + +If peer uses tiny MSS (say, 75 bytes) and similarly tiny advertised +window, the SWS logic will packetize to half the MSS unnecessarily. + +This causes problems with some embedded devices. + +However for large MSS devices we do want to half-MSS packetize +otherwise we never get enough packets into the pipe for things +like fast retransmit and recovery to work. + +Be careful also to handle the case where MSS > window, otherwise +we'll never send until the probe timer. + +Reported-by: ツ Leandro Melo de Sales +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -519,8 +519,22 @@ extern unsigned int tcp_current_mss(stru + /* Bound MSS / TSO packet size with the half of the window */ + static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) + { +- if (tp->max_window && pktsize > (tp->max_window >> 1)) +- return max(tp->max_window >> 1, 68U - tp->tcp_header_len); ++ int cutoff; ++ ++ /* When peer uses tiny windows, there is no use in packetizing ++ * to sub-MSS pieces for the sake of SWS or making sure there ++ * are enough packets in the pipe for fast recovery. ++ * ++ * On the other hand, for extremely large MSS devices, handling ++ * smaller than MSS windows in this way does make sense. ++ */ ++ if (tp->max_window >= 512) ++ cutoff = (tp->max_window >> 1); ++ else ++ cutoff = tp->max_window; ++ ++ if (cutoff && pktsize > cutoff) ++ return max_t(int, cutoff, 68U - tp->tcp_header_len); + else + return pktsize; + } diff --git a/queue-2.6.35/tcp-select-writefds-don-t-hang-up-when-a-peer-close-connection.patch b/queue-2.6.35/tcp-select-writefds-don-t-hang-up-when-a-peer-close-connection.patch new file mode 100644 index 00000000000..aa598675213 --- /dev/null +++ b/queue-2.6.35/tcp-select-writefds-don-t-hang-up-when-a-peer-close-connection.patch @@ -0,0 +1,72 @@ +From e8387b3f5e5fc8ee54c8518d0e45cf3a338e81d6 Mon Sep 17 00:00:00 2001 +From: KOSAKI Motohiro +Date: Tue, 24 Aug 2010 16:05:48 +0000 +Subject: tcp: select(writefds) don't hang up when a peer close connection + + +From: KOSAKI Motohiro + +[ Upstream commit d84ba638e4ba3c40023ff997aa5e8d3ed002af36 ] + +This issue come from ruby language community. Below test program +hang up when only run on Linux. + + % uname -mrsv + Linux 2.6.26-2-486 #1 Sat Dec 26 08:37:39 UTC 2009 i686 + % ruby -rsocket -ve ' + BasicSocket.do_not_reverse_lookup = true + serv = TCPServer.open("127.0.0.1", 0) + s1 = TCPSocket.open("127.0.0.1", serv.addr[1]) + s2 = serv.accept + s2.close + s1.write("a") rescue p $! + s1.write("a") rescue p $! + Thread.new { + s1.write("a") + }.join' + ruby 1.9.3dev (2010-07-06 trunk 28554) [i686-linux] + # + [Hang Here] + +FreeBSD, Solaris, Mac doesn't. because Ruby's write() method call +select() internally. and tcp_poll has a bug. + +SUS defined 'ready for writing' of select() as following. + +| A descriptor shall be considered ready for writing when a call to an output +| function with O_NONBLOCK clear would not block, whether or not the function +| would transfer data successfully. + +That said, EPIPE situation is clearly one of 'ready for writing'. + +We don't have read-side issue because tcp_poll() already has read side +shutdown care. + +| if (sk->sk_shutdown & RCV_SHUTDOWN) +| mask |= POLLIN | POLLRDNORM | POLLRDHUP; + +So, Let's insert same logic in write side. + +- reference url + http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31065 + http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31068 + +Signed-off-by: KOSAKI Motohiro +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -453,7 +453,8 @@ unsigned int tcp_poll(struct file *file, + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) + mask |= POLLOUT | POLLWRNORM; + } +- } ++ } else ++ mask |= POLLOUT | POLLWRNORM; + + if (tp->urg_data & TCP_URG_VALID) + mask |= POLLPRI; diff --git a/queue-2.6.35/udp-add-rehash-on-connect.patch b/queue-2.6.35/udp-add-rehash-on-connect.patch new file mode 100644 index 00000000000..08bb33daf7d --- /dev/null +++ b/queue-2.6.35/udp-add-rehash-on-connect.patch @@ -0,0 +1,200 @@ +From a499b2db4f2c7142a885e7090d2183795ecdf7d0 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 8 Sep 2010 05:08:44 +0000 +Subject: udp: add rehash on connect() + + +From: Eric Dumazet + +commit 719f835853a92f6090258114a72ffe41f09155cd upstream + +commit 30fff923 introduced in linux-2.6.33 (udp: bind() optimisation) +added a secondary hash on UDP, hashed on (local addr, local port). + +Problem is that following sequence : + +fd = socket(...) +connect(fd, &remote, ...) + +not only selects remote end point (address and port), but also sets +local address, while UDP stack stored in secondary hash table the socket +while its local address was INADDR_ANY (or ipv6 equivalent) + +Sequence is : + - autobind() : choose a random local port, insert socket in hash tables + [while local address is INADDR_ANY] + - connect() : set remote address and port, change local address to IP + given by a route lookup. + +When an incoming UDP frame comes, if more than 10 sockets are found in +primary hash table, we switch to secondary table, and fail to find +socket because its local address changed. + +One solution to this problem is to rehash datagram socket if needed. + +We add a new rehash(struct socket *) method in "struct proto", and +implement this method for UDP v4 & v6, using a common helper. + +This rehashing only takes care of secondary hash table, since primary +hash (based on local port only) is not changed. + +Reported-by: Krzysztof Piotr Oledzki +Signed-off-by: Eric Dumazet +Tested-by: Krzysztof Piotr Oledzki +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sock.h | 1 + + include/net/udp.h | 1 + + net/ipv4/datagram.c | 5 ++++- + net/ipv4/udp.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ + net/ipv6/datagram.c | 7 ++++++- + net/ipv6/udp.c | 10 ++++++++++ + 6 files changed, 66 insertions(+), 2 deletions(-) + +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -749,6 +749,7 @@ struct proto { + /* Keeping track of sk's, looking them up, and port selection methods. */ + void (*hash)(struct sock *sk); + void (*unhash)(struct sock *sk); ++ void (*rehash)(struct sock *sk); + int (*get_port)(struct sock *sk, unsigned short snum); + + /* Keeping track of sockets in use */ +--- a/include/net/udp.h ++++ b/include/net/udp.h +@@ -151,6 +151,7 @@ static inline void udp_lib_hash(struct s + } + + extern void udp_lib_unhash(struct sock *sk); ++extern void udp_lib_rehash(struct sock *sk, u16 new_hash); + + static inline void udp_lib_close(struct sock *sk, long timeout) + { +--- a/net/ipv4/datagram.c ++++ b/net/ipv4/datagram.c +@@ -62,8 +62,11 @@ int ip4_datagram_connect(struct sock *sk + } + if (!inet->inet_saddr) + inet->inet_saddr = rt->rt_src; /* Update source address */ +- if (!inet->inet_rcv_saddr) ++ if (!inet->inet_rcv_saddr) { + inet->inet_rcv_saddr = rt->rt_src; ++ if (sk->sk_prot->rehash) ++ sk->sk_prot->rehash(sk); ++ } + inet->inet_daddr = rt->rt_dst; + inet->inet_dport = usin->sin_port; + sk->sk_state = TCP_ESTABLISHED; +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1260,6 +1260,49 @@ void udp_lib_unhash(struct sock *sk) + } + EXPORT_SYMBOL(udp_lib_unhash); + ++/* ++ * inet_rcv_saddr was changed, we must rehash secondary hash ++ */ ++void udp_lib_rehash(struct sock *sk, u16 newhash) ++{ ++ if (sk_hashed(sk)) { ++ struct udp_table *udptable = sk->sk_prot->h.udp_table; ++ struct udp_hslot *hslot, *hslot2, *nhslot2; ++ ++ hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); ++ nhslot2 = udp_hashslot2(udptable, newhash); ++ udp_sk(sk)->udp_portaddr_hash = newhash; ++ if (hslot2 != nhslot2) { ++ hslot = udp_hashslot(udptable, sock_net(sk), ++ udp_sk(sk)->udp_port_hash); ++ /* we must lock primary chain too */ ++ spin_lock_bh(&hslot->lock); ++ ++ spin_lock(&hslot2->lock); ++ hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); ++ hslot2->count--; ++ spin_unlock(&hslot2->lock); ++ ++ spin_lock(&nhslot2->lock); ++ hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, ++ &nhslot2->head); ++ nhslot2->count++; ++ spin_unlock(&nhslot2->lock); ++ ++ spin_unlock_bh(&hslot->lock); ++ } ++ } ++} ++EXPORT_SYMBOL(udp_lib_rehash); ++ ++static void udp_v4_rehash(struct sock *sk) ++{ ++ u16 new_hash = udp4_portaddr_hash(sock_net(sk), ++ inet_sk(sk)->inet_rcv_saddr, ++ inet_sk(sk)->inet_num); ++ udp_lib_rehash(sk, new_hash); ++} ++ + static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) + { + int rc; +@@ -1843,6 +1886,7 @@ struct proto udp_prot = { + .backlog_rcv = __udp_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, ++ .rehash = udp_v4_rehash, + .get_port = udp_v4_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, +--- a/net/ipv6/datagram.c ++++ b/net/ipv6/datagram.c +@@ -104,9 +104,12 @@ ipv4_connected: + if (ipv6_addr_any(&np->saddr)) + ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); + +- if (ipv6_addr_any(&np->rcv_saddr)) ++ if (ipv6_addr_any(&np->rcv_saddr)) { + ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, + &np->rcv_saddr); ++ if (sk->sk_prot->rehash) ++ sk->sk_prot->rehash(sk); ++ } + + goto out; + } +@@ -191,6 +194,8 @@ ipv4_connected: + if (ipv6_addr_any(&np->rcv_saddr)) { + ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); + inet->inet_rcv_saddr = LOOPBACK4_IPV6; ++ if (sk->sk_prot->rehash) ++ sk->sk_prot->rehash(sk); + } + + ip6_dst_store(sk, dst, +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -111,6 +111,15 @@ int udp_v6_get_port(struct sock *sk, uns + return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); + } + ++static void udp_v6_rehash(struct sock *sk) ++{ ++ u16 new_hash = udp6_portaddr_hash(sock_net(sk), ++ &inet6_sk(sk)->rcv_saddr, ++ inet_sk(sk)->inet_num); ++ ++ udp_lib_rehash(sk, new_hash); ++} ++ + static inline int compute_score(struct sock *sk, struct net *net, + unsigned short hnum, + struct in6_addr *saddr, __be16 sport, +@@ -1452,6 +1461,7 @@ struct proto udpv6_prot = { + .backlog_rcv = udpv6_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, ++ .rehash = udp_v6_rehash, + .get_port = udp_v6_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, diff --git a/queue-2.6.35/unix-do-not-loop-forever-at-unix_autobind.patch b/queue-2.6.35/unix-do-not-loop-forever-at-unix_autobind.patch new file mode 100644 index 00000000000..5f19a4fb245 --- /dev/null +++ b/queue-2.6.35/unix-do-not-loop-forever-at-unix_autobind.patch @@ -0,0 +1,70 @@ +From f08e075573ad91db08c6fbfd0d760a6adb713f00 Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa +Date: Sat, 4 Sep 2010 01:34:28 +0000 +Subject: UNIX: Do not loop forever at unix_autobind(). + + +From: Tetsuo Handa + +[ Upstream commit a9117426d0fcc05a194f728159a2d43df43c7add ] + +We assumed that unix_autobind() never fails if kzalloc() succeeded. +But unix_autobind() allows only 1048576 names. If /proc/sys/fs/file-max is +larger than 1048576 (e.g. systems with more than 10GB of RAM), a local user can +consume all names using fork()/socket()/bind(). + +If all names are in use, those who call bind() with addr_len == sizeof(short) +or connect()/sendmsg() with setsockopt(SO_PASSCRED) will continue + + while (1) + yield(); + +loop at unix_autobind() till a name becomes available. +This patch adds a loop counter in order to give up after 1048576 attempts. + +Calling yield() for once per 256 attempts may not be sufficient when many names +are already in use, for __unix_find_socket_byname() can take long time under +such circumstance. Therefore, this patch also adds cond_resched() call. + +Note that currently a local user can consume 2GB of kernel memory if the user +is allowed to create and autobind 1048576 UNIX domain sockets. We should +consider adding some restriction for autobind operation. + +Signed-off-by: Tetsuo Handa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -673,6 +673,7 @@ static int unix_autobind(struct socket * + static u32 ordernum = 1; + struct unix_address *addr; + int err; ++ unsigned int retries = 0; + + mutex_lock(&u->readlock); + +@@ -698,9 +699,17 @@ retry: + if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, + addr->hash)) { + spin_unlock(&unix_table_lock); +- /* Sanity yield. It is unusual case, but yet... */ +- if (!(ordernum&0xFF)) +- yield(); ++ /* ++ * __unix_find_socket_byname() may take long time if many names ++ * are already in use. ++ */ ++ cond_resched(); ++ /* Give up if all names seems to be in use. */ ++ if (retries++ == 0xFFFFF) { ++ err = -ENOSPC; ++ kfree(addr); ++ goto out; ++ } + goto retry; + } + addr->hash ^= sk->sk_type; diff --git a/queue-2.6.35/usb-musb_debugfs-don-t-use-the-struct-file-private_data-field-with-seq_files.patch b/queue-2.6.35/usb-musb_debugfs-don-t-use-the-struct-file-private_data-field-with-seq_files.patch new file mode 100644 index 00000000000..14f937098ee --- /dev/null +++ b/queue-2.6.35/usb-musb_debugfs-don-t-use-the-struct-file-private_data-field-with-seq_files.patch @@ -0,0 +1,40 @@ +From 024cfa5943a7e89565c60b612d698c2bfb3da66a Mon Sep 17 00:00:00 2001 +From: Mathias Nyman +Date: Mon, 6 Sep 2010 13:52:01 +0300 +Subject: usb: musb_debugfs: don't use the struct file private_data field with seq_files + +From: Mathias Nyman + +commit 024cfa5943a7e89565c60b612d698c2bfb3da66a upstream. + +seq_files use the private_data field of a file struct for storing a seq_file structure, +data should be stored in seq_file's own private field (e.g. file->private_data->private) +Otherwise seq_release() will free the private data when the file is closed. + +Signed-off-by: Mathias Nyman +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/usb/musb/musb_debugfs.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/usb/musb/musb_debugfs.c ++++ b/drivers/usb/musb/musb_debugfs.c +@@ -195,15 +195,14 @@ static const struct file_operations musb + + static int musb_test_mode_open(struct inode *inode, struct file *file) + { +- file->private_data = inode->i_private; +- + return single_open(file, musb_test_mode_show, inode->i_private); + } + + static ssize_t musb_test_mode_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) + { +- struct musb *musb = file->private_data; ++ struct seq_file *s = file->private_data; ++ struct musb *musb = s->private; + u8 test = 0; + char buf[18]; + diff --git a/queue-2.6.35/usb-serial-mos-prevent-reading-uninitialized-stack-memory.patch b/queue-2.6.35/usb-serial-mos-prevent-reading-uninitialized-stack-memory.patch new file mode 100644 index 00000000000..55bc92cb68c --- /dev/null +++ b/queue-2.6.35/usb-serial-mos-prevent-reading-uninitialized-stack-memory.patch @@ -0,0 +1,47 @@ +From a0846f1868b11cd827bdfeaf4527d8b1b1c0b098 Mon Sep 17 00:00:00 2001 +From: Dan Rosenberg +Date: Wed, 15 Sep 2010 17:44:16 -0400 +Subject: USB: serial/mos*: prevent reading uninitialized stack memory + +From: Dan Rosenberg + +commit a0846f1868b11cd827bdfeaf4527d8b1b1c0b098 upstream. + +The TIOCGICOUNT device ioctl in both mos7720.c and mos7840.c allows +unprivileged users to read uninitialized stack memory, because the +"reserved" member of the serial_icounter_struct struct declared on the +stack is not altered or zeroed before being copied back to the user. +This patch takes care of it. + +Signed-off-by: Dan Rosenberg +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/usb/serial/mos7720.c | 3 +++ + drivers/usb/serial/mos7840.c | 3 +++ + 2 files changed, 6 insertions(+) + +--- a/drivers/usb/serial/mos7720.c ++++ b/drivers/usb/serial/mos7720.c +@@ -2024,6 +2024,9 @@ static int mos7720_ioctl(struct tty_stru + + case TIOCGICOUNT: + cnow = mos7720_port->icount; ++ ++ memset(&icount, 0, sizeof(struct serial_icounter_struct)); ++ + icount.cts = cnow.cts; + icount.dsr = cnow.dsr; + icount.rng = cnow.rng; +--- a/drivers/usb/serial/mos7840.c ++++ b/drivers/usb/serial/mos7840.c +@@ -2285,6 +2285,9 @@ static int mos7840_ioctl(struct tty_stru + case TIOCGICOUNT: + cnow = mos7840_port->icount; + smp_rmb(); ++ ++ memset(&icount, 0, sizeof(struct serial_icounter_struct)); ++ + icount.cts = cnow.cts; + icount.dsr = cnow.dsr; + icount.rng = cnow.rng;