From: Greg Kroah-Hartman Date: Wed, 22 Sep 2010 19:57:47 +0000 (-0700) Subject: .32 patches X-Git-Tag: v2.6.35.6~18 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fc0485aeb17c0054ffb1860d283c79d707399388;p=thirdparty%2Fkernel%2Fstable-queue.git .32 patches --- diff --git a/queue-2.6.32/bonding-correctly-process-non-linear-skbs.patch b/queue-2.6.32/bonding-correctly-process-non-linear-skbs.patch new file mode 100644 index 00000000000..3afb657288b --- /dev/null +++ b/queue-2.6.32/bonding-correctly-process-non-linear-skbs.patch @@ -0,0 +1,59 @@ +From ab12811c89e88f2e66746790b1fe4469ccb7bdd9 Mon Sep 17 00:00:00 2001 +From: Andy Gospodarek +Date: Fri, 10 Sep 2010 11:43:20 +0000 +Subject: bonding: correctly process non-linear skbs + +From: Andy Gospodarek + +commit ab12811c89e88f2e66746790b1fe4469ccb7bdd9 upstream. + +It was recently brought to my attention that 802.3ad mode bonds would no +longer form when using some network hardware after a driver update. +After snooping around I realized that the particular hardware was using +page-based skbs and found that skb->data did not contain a valid LACPDU +as it was not stored there. That explained the inability to form an +802.3ad-based bond. For balance-alb mode bonds this was also an issue +as ARPs would not be properly processed. + +This patch fixes the issue in my tests and should be applied to 2.6.36 +and as far back as anyone cares to add it to stable. + +Thanks to Alexander Duyck and Jesse +Brandeburg for the suggestions on this one. + +Signed-off-by: Andy Gospodarek +CC: Alexander Duyck +CC: Jesse Brandeburg +Signed-off-by: Jay Vosburgh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/bonding/bond_3ad.c | 3 +++ + drivers/net/bonding/bond_alb.c | 3 +++ + 2 files changed, 6 insertions(+) + +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -2451,6 +2451,9 @@ int bond_3ad_lacpdu_recv(struct sk_buff + if (!(dev->flags & IFF_MASTER)) + goto out; + ++ if (!pskb_may_pull(skb, sizeof(struct lacpdu))) ++ goto out; ++ + read_lock(&bond->lock); + slave = bond_get_slave_by_dev((struct bonding *)netdev_priv(dev), + orig_dev); +--- a/drivers/net/bonding/bond_alb.c ++++ b/drivers/net/bonding/bond_alb.c +@@ -370,6 +370,9 @@ static int rlb_arp_recv(struct sk_buff * + goto out; + } + ++ if (!pskb_may_pull(skb, arp_hdr_len(bond_dev))) ++ goto out; ++ + if (skb->len < sizeof(struct arp_pkt)) { + pr_debug("Packet is too small to be an ARP\n"); + goto out; diff --git a/queue-2.6.32/bridge-clear-inet-control-block-of-skbs-passed-into-ip_fragment.patch b/queue-2.6.32/bridge-clear-inet-control-block-of-skbs-passed-into-ip_fragment.patch new file mode 100644 index 00000000000..45650272659 --- /dev/null +++ b/queue-2.6.32/bridge-clear-inet-control-block-of-skbs-passed-into-ip_fragment.patch @@ -0,0 +1,41 @@ +From 3bcae642028b1e041d8ece56a681efceacf9065f Mon Sep 17 00:00:00 2001 +From: David S. Miller +Date: Sun, 19 Sep 2010 21:45:29 -0700 +Subject: bridge: Clear INET control block of SKBs passed into ip_fragment(). + + +From: David S. Miller + +[ Upstream commit 4ce6b9e1621c187a32a47a17bf6be93b1dc4a3df ] + +In a similar vain to commit 17762060c25590bfddd68cc1131f28ec720f405f +("bridge: Clear IPCB before possible entry into IP stack") + +Any time we call into the IP stack we have to make sure the state +there is as expected by the ipv4 code. + +With help from Eric Dumazet and Herbert Xu. + +Reported-by: Brandan Das +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_netfilter.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/bridge/br_netfilter.c ++++ b/net/bridge/br_netfilter.c +@@ -800,9 +800,11 @@ static int br_nf_dev_queue_xmit(struct s + if (skb->nfct != NULL && + (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) && + skb->len > skb->dev->mtu && +- !skb_is_gso(skb)) ++ !skb_is_gso(skb)) { ++ /* BUG: Should really parse the IP options here. */ ++ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + return ip_fragment(skb, br_dev_queue_push_xmit); +- else ++ } else + return br_dev_queue_push_xmit(skb); + } + #else diff --git a/queue-2.6.32/bridge-clear-ipcb-before-possible-entry-into-ip-stack.patch b/queue-2.6.32/bridge-clear-ipcb-before-possible-entry-into-ip-stack.patch new file mode 100644 index 00000000000..297ddb19ce6 --- /dev/null +++ b/queue-2.6.32/bridge-clear-ipcb-before-possible-entry-into-ip-stack.patch @@ -0,0 +1,38 @@ +From 7562ef435a00b50b3764b137b22eb2e883289ea3 Mon Sep 17 00:00:00 2001 +From: Herbert Xu +Date: Mon, 5 Jul 2010 21:29:28 +0000 +Subject: bridge: Clear IPCB before possible entry into IP stack + + +From: Herbert Xu + +[ Upstream commit 17762060c25590bfddd68cc1131f28ec720f405f ] + +The bridge protocol lives dangerously by having incestuous relations +with the IP stack. In this instance an abomination has been created +where a bogus IPCB area from a bridged packet leads to a crash in +the IP stack because it's interpreted as IP options. + +This patch papers over the problem by clearing the IPCB area in that +particular spot. To fix this properly we'd also need to parse any +IP options if present but I'm way too lazy for that. + +Signed-off-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_netfilter.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/bridge/br_netfilter.c ++++ b/net/bridge/br_netfilter.c +@@ -600,6 +600,9 @@ static unsigned int br_nf_pre_routing(un + + pskb_trim_rcsum(skb, len); + ++ /* BUG: Should really parse the IP options here. */ ++ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); ++ + nf_bridge_put(skb->nf_bridge); + if (!nf_bridge_alloc(skb)) + return NF_DROP; diff --git a/queue-2.6.32/drivers-net-cxgb3-cxgb3_main.c-prevent-reading-uninitialized-stack-memory.patch b/queue-2.6.32/drivers-net-cxgb3-cxgb3_main.c-prevent-reading-uninitialized-stack-memory.patch new file mode 100644 index 00000000000..e0bf9daa297 --- /dev/null +++ b/queue-2.6.32/drivers-net-cxgb3-cxgb3_main.c-prevent-reading-uninitialized-stack-memory.patch @@ -0,0 +1,36 @@ +From 49c37c0334a9b85d30ab3d6b5d1acb05ef2ef6de Mon Sep 17 00:00:00 2001 +From: Dan Rosenberg +Date: Wed, 15 Sep 2010 11:43:12 +0000 +Subject: drivers/net/cxgb3/cxgb3_main.c: prevent reading uninitialized stack memory + +From: Dan Rosenberg + +commit 49c37c0334a9b85d30ab3d6b5d1acb05ef2ef6de upstream. + +Fixed formatting (tabs and line breaks). + +The CHELSIO_GET_QSET_NUM device ioctl allows unprivileged users to read +4 bytes of uninitialized stack memory, because the "addr" member of the +ch_reg struct declared on the stack in cxgb_extension_ioctl() is not +altered or zeroed before being copied back to the user. This patch +takes care of it. + +Signed-off-by: Dan Rosenberg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/cxgb3/cxgb3_main.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/cxgb3/cxgb3_main.c ++++ b/drivers/net/cxgb3/cxgb3_main.c +@@ -2274,6 +2274,8 @@ static int cxgb_extension_ioctl(struct n + case CHELSIO_GET_QSET_NUM:{ + struct ch_reg edata; + ++ memset(&edata, 0, sizeof(struct ch_reg)); ++ + edata.cmd = CHELSIO_GET_QSET_NUM; + edata.val = pi->nqsets; + if (copy_to_user(useraddr, &edata, sizeof(edata))) diff --git a/queue-2.6.32/drivers-net-eql.c-prevent-reading-uninitialized-stack-memory.patch b/queue-2.6.32/drivers-net-eql.c-prevent-reading-uninitialized-stack-memory.patch new file mode 100644 index 00000000000..ca7ea26f9c0 --- /dev/null +++ b/queue-2.6.32/drivers-net-eql.c-prevent-reading-uninitialized-stack-memory.patch @@ -0,0 +1,36 @@ +From 44467187dc22fdd33a1a06ea0ba86ce20be3fe3c Mon Sep 17 00:00:00 2001 +From: Dan Rosenberg +Date: Wed, 15 Sep 2010 11:43:04 +0000 +Subject: drivers/net/eql.c: prevent reading uninitialized stack memory + +From: Dan Rosenberg + +commit 44467187dc22fdd33a1a06ea0ba86ce20be3fe3c upstream. + +Fixed formatting (tabs and line breaks). + +The EQL_GETMASTRCFG device ioctl allows unprivileged users to read 16 +bytes of uninitialized stack memory, because the "master_name" member of +the master_config_t struct declared on the stack in eql_g_master_cfg() +is not altered or zeroed before being copied back to the user. This +patch takes care of it. + +Signed-off-by: Dan Rosenberg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/eql.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/eql.c ++++ b/drivers/net/eql.c +@@ -554,6 +554,8 @@ static int eql_g_master_cfg(struct net_d + equalizer_t *eql; + master_config_t mc; + ++ memset(&mc, 0, sizeof(master_config_t)); ++ + if (eql_is_master(dev)) { + eql = netdev_priv(dev); + mc.max_slaves = eql->max_slaves; diff --git a/queue-2.6.32/drivers-net-usb-hso.c-prevent-reading-uninitialized-memory.patch b/queue-2.6.32/drivers-net-usb-hso.c-prevent-reading-uninitialized-memory.patch new file mode 100644 index 00000000000..0eb64bdaff4 --- /dev/null +++ b/queue-2.6.32/drivers-net-usb-hso.c-prevent-reading-uninitialized-memory.patch @@ -0,0 +1,36 @@ +From 7011e660938fc44ed86319c18a5954e95a82ab3e Mon Sep 17 00:00:00 2001 +From: Dan Rosenberg +Date: Wed, 15 Sep 2010 11:43:28 +0000 +Subject: drivers/net/usb/hso.c: prevent reading uninitialized memory + +From: Dan Rosenberg + +commit 7011e660938fc44ed86319c18a5954e95a82ab3e upstream. + +Fixed formatting (tabs and line breaks). + +The TIOCGICOUNT device ioctl allows unprivileged users to read +uninitialized stack memory, because the "reserved" member of the +serial_icounter_struct struct declared on the stack in hso_get_count() +is not altered or zeroed before being copied back to the user. This +patch takes care of it. + +Signed-off-by: Dan Rosenberg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/usb/hso.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/usb/hso.c ++++ b/drivers/net/usb/hso.c +@@ -1634,6 +1634,8 @@ static int hso_get_count(struct hso_seri + struct uart_icount cnow; + struct hso_tiocmget *tiocmget = serial->tiocmget; + ++ memset(&icount, 0, sizeof(struct serial_icounter_struct)); ++ + if (!tiocmget) + return -ENOENT; + spin_lock_irq(&serial->serial_lock); diff --git a/queue-2.6.32/gro-fix-different-skb-headrooms.patch b/queue-2.6.32/gro-fix-different-skb-headrooms.patch new file mode 100644 index 00000000000..69363d94f9b --- /dev/null +++ b/queue-2.6.32/gro-fix-different-skb-headrooms.patch @@ -0,0 +1,68 @@ +From d06a1f68281f7235a4d54fd142685bbd1f3f901d Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 1 Sep 2010 00:50:51 +0000 +Subject: gro: fix different skb headrooms + + +From: Eric Dumazet + +[ Upstream commit 3d3be4333fdf6faa080947b331a6a19bce1a4f57 ] + +Packets entering GRO might have different headrooms, even for a given +flow (because of implementation details in drivers, like copybreak). +We cant force drivers to deliver packets with a fixed headroom. + +1) fix skb_segment() + +skb_segment() makes the false assumption headrooms of fragments are same +than the head. When CHECKSUM_PARTIAL is used, this can give csum_start +errors, and crash later in skb_copy_and_csum_dev() + +2) allocate a minimal skb for head of frag_list + +skb_gro_receive() uses netdev_alloc_skb(headroom + skb_gro_offset(p)) to +allocate a fresh skb. This adds NET_SKB_PAD to a padding already +provided by netdevice, depending on various things, like copybreak. + +Use alloc_skb() to allocate an exact padding, to reduce cache line +needs: +NET_SKB_PAD + NET_IP_ALIGN + +bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=16626 + +Many thanks to Plamen Petrov, testing many debugging patches ! +With help of Jarek Poplawski. + +Reported-by: Plamen Petrov +Signed-off-by: Eric Dumazet +CC: Jarek Poplawski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -2575,6 +2575,10 @@ struct sk_buff *skb_segment(struct sk_bu + __copy_skb_header(nskb, skb); + nskb->mac_len = skb->mac_len; + ++ /* nskb and skb might have different headroom */ ++ if (nskb->ip_summed == CHECKSUM_PARTIAL) ++ nskb->csum_start += skb_headroom(nskb) - headroom; ++ + skb_reset_mac_header(nskb); + skb_set_network_header(nskb, skb->mac_len); + nskb->transport_header = (nskb->network_header + +@@ -2704,8 +2708,8 @@ int skb_gro_receive(struct sk_buff **hea + } else if (skb_gro_len(p) != pinfo->gso_size) + return -E2BIG; + +- headroom = skb_headroom(p); +- nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p)); ++ headroom = NET_SKB_PAD + NET_IP_ALIGN; ++ nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC); + if (unlikely(!nskb)) + return -ENOMEM; + diff --git a/queue-2.6.32/gro-re-fix-different-skb-headrooms.patch b/queue-2.6.32/gro-re-fix-different-skb-headrooms.patch new file mode 100644 index 00000000000..4225fae6756 --- /dev/null +++ b/queue-2.6.32/gro-re-fix-different-skb-headrooms.patch @@ -0,0 +1,44 @@ +From a04f025614278fe1c35072aba21a84d9ea9842f2 Mon Sep 17 00:00:00 2001 +From: Jarek Poplawski +Date: Sat, 4 Sep 2010 10:34:29 +0000 +Subject: gro: Re-fix different skb headrooms + + +From: Jarek Poplawski + +[ Upstream commit 64289c8e6851bca0e589e064c9a5c9fbd6ae5dd4 ] + +The patch: "gro: fix different skb headrooms" in its part: +"2) allocate a minimal skb for head of frag_list" is buggy. The copied +skb has p->data set at the ip header at the moment, and skb_gro_offset +is the length of ip + tcp headers. So, after the change the length of +mac header is skipped. Later skb_set_mac_header() sets it into the +NET_SKB_PAD area (if it's long enough) and ip header is misaligned at +NET_SKB_PAD + NET_IP_ALIGN offset. There is no reason to assume the +original skb was wrongly allocated, so let's copy it as it was. + +bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=16626 +fixes commit: 3d3be4333fdf6faa080947b331a6a19bce1a4f57 + +Reported-by: Plamen Petrov +Signed-off-by: Jarek Poplawski +CC: Eric Dumazet +Acked-by: Eric Dumazet +Tested-by: Plamen Petrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -2708,7 +2708,7 @@ int skb_gro_receive(struct sk_buff **hea + } else if (skb_gro_len(p) != pinfo->gso_size) + return -E2BIG; + +- headroom = NET_SKB_PAD + NET_IP_ALIGN; ++ headroom = skb_headroom(p); + nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC); + if (unlikely(!nskb)) + return -ENOMEM; diff --git a/queue-2.6.32/irda-correctly-clean-up-self-ias_obj-on-irda_bind-failure.patch b/queue-2.6.32/irda-correctly-clean-up-self-ias_obj-on-irda_bind-failure.patch new file mode 100644 index 00000000000..03a161edbc6 --- /dev/null +++ b/queue-2.6.32/irda-correctly-clean-up-self-ias_obj-on-irda_bind-failure.patch @@ -0,0 +1,39 @@ +From 5ff28ffd3cba8ab8129841763271dd1401721e7a Mon Sep 17 00:00:00 2001 +From: David S. Miller +Date: Sun, 19 Sep 2010 17:56:19 -0700 +Subject: irda: Correctly clean up self->ias_obj on irda_bind() failure. + + +From: David S. Miller + +[ Upstream commit 628e300cccaa628d8fb92aa28cb7530a3d5f2257 ] + +If irda_open_tsap() fails, the irda_bind() code tries to destroy +the ->ias_obj object by hand, but does so wrongly. + +In particular, it fails to a) release the hashbin attached to the +object and b) reset the self->ias_obj pointer to NULL. + +Fix both problems by using irias_delete_object() and explicitly +setting self->ias_obj to NULL, just as irda_release() does. + +Reported-by: Tavis Ormandy +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/irda/af_irda.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/irda/af_irda.c ++++ b/net/irda/af_irda.c +@@ -810,8 +810,8 @@ static int irda_bind(struct socket *sock + + err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name); + if (err < 0) { +- kfree(self->ias_obj->name); +- kfree(self->ias_obj); ++ irias_delete_object(self->ias_obj); ++ self->ias_obj = NULL; + return err; + } + diff --git a/queue-2.6.32/net-fix-oops-from-tcp_collapse-when-using-splice.patch b/queue-2.6.32/net-fix-oops-from-tcp_collapse-when-using-splice.patch new file mode 100644 index 00000000000..e8b04267488 --- /dev/null +++ b/queue-2.6.32/net-fix-oops-from-tcp_collapse-when-using-splice.patch @@ -0,0 +1,34 @@ +From 99537bdb8d10c5030437f1a10c35c9d2dd272200 Mon Sep 17 00:00:00 2001 +From: Steven J. Magnani +Date: Tue, 30 Mar 2010 13:56:01 -0700 +Subject: net: Fix oops from tcp_collapse() when using splice() + + +From: Steven J. Magnani + +[ Upstream commit baff42ab1494528907bf4d5870359e31711746ae ] + +tcp_read_sock() can have a eat skbs without immediately advancing copied_seq. +This can cause a panic in tcp_collapse() if it is called as a result +of the recv_actor dropping the socket lock. + +A userspace program that splices data from a socket to either another +socket or to a file can trigger this bug. + +Signed-off-by: Steven J. Magnani +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1335,6 +1335,7 @@ int tcp_read_sock(struct sock *sk, read_ + sk_eat_skb(sk, skb, 0); + if (!desc->count) + break; ++ tp->copied_seq = seq; + } + tp->copied_seq = seq; + diff --git a/queue-2.6.32/r8169-fix-mdio_read-and-update-mdio_write-according-to-hw-specs.patch b/queue-2.6.32/r8169-fix-mdio_read-and-update-mdio_write-according-to-hw-specs.patch new file mode 100644 index 00000000000..e5c852ff449 --- /dev/null +++ b/queue-2.6.32/r8169-fix-mdio_read-and-update-mdio_write-according-to-hw-specs.patch @@ -0,0 +1,51 @@ +From c89e41936cca2f734bd07eb3892a302b98cddd7a Mon Sep 17 00:00:00 2001 +From: Timo Teräs +Date: Wed, 9 Jun 2010 17:31:48 -0700 +Subject: r8169: fix mdio_read and update mdio_write according to hw specs + + +From: Timo Teräs + +[ Upstream commit 81a95f049962ec20a9aed888e676208b206f0f2e ] + +Realtek confirmed that a 20us delay is needed after mdio_read and +mdio_write operations. Reduce the delay in mdio_write, and add it +to mdio_read too. Also add a comment that the 20us is from hw specs. + +Signed-off-by: Timo Teräs +Acked-by: Francois Romieu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/r8169.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/drivers/net/r8169.c ++++ b/drivers/net/r8169.c +@@ -558,10 +558,10 @@ static void mdio_write(void __iomem *ioa + udelay(25); + } + /* +- * Some configurations require a small delay even after the write +- * completed indication or the next write might fail. ++ * According to hardware specs a 20us delay is required after write ++ * complete indication, but before sending next command. + */ +- udelay(25); ++ udelay(20); + } + + static int mdio_read(void __iomem *ioaddr, int reg_addr) +@@ -581,6 +581,12 @@ static int mdio_read(void __iomem *ioadd + } + udelay(25); + } ++ /* ++ * According to hardware specs a 20us delay is required after read ++ * complete indication, but before sending next command. ++ */ ++ udelay(20); ++ + return value; + } + diff --git a/queue-2.6.32/r8169-fix-random-mdio_write-failures.patch b/queue-2.6.32/r8169-fix-random-mdio_write-failures.patch new file mode 100644 index 00000000000..4c465ec836b --- /dev/null +++ b/queue-2.6.32/r8169-fix-random-mdio_write-failures.patch @@ -0,0 +1,46 @@ +From d64d77392ecea10af61dfae72948c27318aec296 Mon Sep 17 00:00:00 2001 +From: Timo Teräs +Date: Sun, 6 Jun 2010 15:38:47 -0700 +Subject: r8169: fix random mdio_write failures + + +From: Timo Teräs + +[ Upstream commit 024a07bacf8287a6ddfa83e9d5b951c5e8b4070e ] + +Some configurations need delay between the "write completed" indication +and new write to work reliably. + +Realtek driver seems to use longer delay when polling the "write complete" +bit, so it waits long enough between writes with high probability (but +could probably break too). This patch adds a new udelay to make sure we +wait unconditionally some time after the write complete indication. + +This caused a regression with XID 18000000 boards when the board specific +phy configuration writing many mdio registers was added in commit +2e955856ff (r8169: phy init for the 8169scd). Some of the configration +mdio writes would almost always fail, and depending on failure might leave +the PHY in non-working state. + +Signed-off-by: Timo Teräs +Acked-off-by: Francois Romieu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/r8169.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/r8169.c ++++ b/drivers/net/r8169.c +@@ -557,6 +557,11 @@ static void mdio_write(void __iomem *ioa + break; + udelay(25); + } ++ /* ++ * Some configurations require a small delay even after the write ++ * completed indication or the next write might fail. ++ */ ++ udelay(25); + } + + static int mdio_read(void __iomem *ioaddr, int reg_addr) diff --git a/queue-2.6.32/rds-fix-a-leak-of-kernel-memory.patch b/queue-2.6.32/rds-fix-a-leak-of-kernel-memory.patch new file mode 100644 index 00000000000..545d66d21dd --- /dev/null +++ b/queue-2.6.32/rds-fix-a-leak-of-kernel-memory.patch @@ -0,0 +1,32 @@ +From 31199048693cdc7720cfb86429ffa9b5d3bfa671 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 16 Aug 2010 03:25:00 +0000 +Subject: rds: fix a leak of kernel memory + + +From: Eric Dumazet + +[ Upstream commit f037590fff3005ce8a1513858d7d44f50053cc8f ] + +struct rds_rdma_notify contains a 32 bits hole on 64bit arches, +make sure it is zeroed before copying it to user. + +Signed-off-by: Eric Dumazet +CC: Andy Grover +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rds/recv.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/rds/recv.c ++++ b/net/rds/recv.c +@@ -296,7 +296,7 @@ static int rds_still_queued(struct rds_s + int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr) + { + struct rds_notifier *notifier; +- struct rds_rdma_notify cmsg; ++ struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */ + unsigned int count = 0, max_messages = ~0U; + unsigned long flags; + LIST_HEAD(copy); diff --git a/queue-2.6.32/sparc-provide-io-read-write-16-32-be.patch b/queue-2.6.32/sparc-provide-io-read-write-16-32-be.patch new file mode 100644 index 00000000000..6916a1ea014 --- /dev/null +++ b/queue-2.6.32/sparc-provide-io-read-write-16-32-be.patch @@ -0,0 +1,50 @@ +From 524954b9654784b232a5fedf5ab0713964d24e27 Mon Sep 17 00:00:00 2001 +From: David S. Miller +Date: Wed, 3 Mar 2010 02:30:37 -0800 +Subject: sparc: Provide io{read,write}{16,32}be(). + +From: David S. Miller + +[ Upstream commit 1bff4dbb79a2bc0ee4881c8ea6a4fbed64ea6309 ] + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/io_32.h | 4 ++++ + arch/sparc/include/asm/io_64.h | 4 ++++ + 2 files changed, 8 insertions(+) + +--- a/arch/sparc/include/asm/io_32.h ++++ b/arch/sparc/include/asm/io_32.h +@@ -249,10 +249,14 @@ extern void iounmap(volatile void __iome + + #define ioread8(X) readb(X) + #define ioread16(X) readw(X) ++#define ioread16be(X) __raw_readw(X) + #define ioread32(X) readl(X) ++#define ioread32be(X) __raw_readl(X) + #define iowrite8(val,X) writeb(val,X) + #define iowrite16(val,X) writew(val,X) ++#define iowrite16be(val,X) __raw_writew(val,X) + #define iowrite32(val,X) writel(val,X) ++#define iowrite32be(val,X) __raw_writel(val,X) + + static inline void ioread8_rep(void __iomem *port, void *buf, unsigned long count) + { +--- a/arch/sparc/include/asm/io_64.h ++++ b/arch/sparc/include/asm/io_64.h +@@ -468,10 +468,14 @@ static inline void iounmap(volatile void + + #define ioread8(X) readb(X) + #define ioread16(X) readw(X) ++#define ioread16be(X) __raw_readw(X) + #define ioread32(X) readl(X) ++#define ioread32be(X) __raw_readl(X) + #define iowrite8(val,X) writeb(val,X) + #define iowrite16(val,X) writew(val,X) ++#define iowrite16be(val,X) __raw_writew(val,X) + #define iowrite32(val,X) writel(val,X) ++#define iowrite32be(val,X) __raw_writel(val,X) + + /* Create a virtual mapping cookie for an IO port range */ + extern void __iomem *ioport_map(unsigned long port, unsigned int nr); diff --git a/queue-2.6.32/sparc64-get-rid-of-indirect-p1275-prom-call-buffer.patch b/queue-2.6.32/sparc64-get-rid-of-indirect-p1275-prom-call-buffer.patch new file mode 100644 index 00000000000..39c871af546 --- /dev/null +++ b/queue-2.6.32/sparc64-get-rid-of-indirect-p1275-prom-call-buffer.patch @@ -0,0 +1,1234 @@ +From 2a6515d8d708846591ef0c4bce0e226f75aa01a7 Mon Sep 17 00:00:00 2001 +From: David S. Miller +Date: Sun, 19 Sep 2010 17:50:44 -0700 +Subject: sparc64: Get rid of indirect p1275 PROM call buffer. + +From: David S. Miller + +[ Upstream commit 25edd6946a1d74e5e77813c2324a0908c68bcf9e ] + +This is based upon a report by Meelis Roos showing that it's possible +that we'll try to fetch a property that is 32K in size with some +devices. With the current fixed 3K buffer we use for moving data in +and out of the firmware during PROM calls, that simply won't work. + +In fact, it will scramble random kernel data during bootup. + +The reasoning behind the temporary buffer is entirely historical. It +used to be the case that we had problems referencing dynamic kernel +memory (including the stack) early in the boot process before we +explicitly told the firwmare to switch us over to the kernel trap +table. + +So what we did was always give the firmware buffers that were locked +into the main kernel image. + +But we no longer have problems like that, so get rid of all of this +indirect bounce buffering. + +Besides fixing Meelis's bug, this also makes the kernel data about 3K +smaller. + +It was also discovered during these conversions that the +implementation of prom_retain() was completely wrong, so that was +fixed here as well. Currently that interface is not in use. + +Reported-by: Meelis Roos +Tested-by: Meelis Roos +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/oplib_64.h | 27 --- + arch/sparc/prom/cif.S | 16 - + arch/sparc/prom/console_64.c | 48 ++++- + arch/sparc/prom/devops_64.c | 36 +++- + arch/sparc/prom/misc_64.c | 314 ++++++++++++++++++++++++++------------ + arch/sparc/prom/p1275.c | 102 ------------ + arch/sparc/prom/tree_64.c | 210 ++++++++++++++++++------- + 7 files changed, 456 insertions(+), 297 deletions(-) + +--- a/arch/sparc/include/asm/oplib_64.h ++++ b/arch/sparc/include/asm/oplib_64.h +@@ -185,9 +185,8 @@ extern int prom_getunumber(int syndrome_ + char *buf, int buflen); + + /* Retain physical memory to the caller across soft resets. */ +-extern unsigned long prom_retain(const char *name, +- unsigned long pa_low, unsigned long pa_high, +- long size, long align); ++extern int prom_retain(const char *name, unsigned long size, ++ unsigned long align, unsigned long *paddr); + + /* Load explicit I/D TLB entries into the calling processor. */ + extern long prom_itlb_load(unsigned long index, +@@ -287,26 +286,6 @@ extern void prom_sun4v_guest_soft_state( + extern int prom_ihandle2path(int handle, char *buffer, int bufsize); + + /* Client interface level routines. */ +-extern long p1275_cmd(const char *, long, ...); +- +-#if 0 +-#define P1275_SIZE(x) ((((long)((x) / 32)) << 32) | (x)) +-#else +-#define P1275_SIZE(x) x +-#endif +- +-/* We support at most 16 input and 1 output argument */ +-#define P1275_ARG_NUMBER 0 +-#define P1275_ARG_IN_STRING 1 +-#define P1275_ARG_OUT_BUF 2 +-#define P1275_ARG_OUT_32B 3 +-#define P1275_ARG_IN_FUNCTION 4 +-#define P1275_ARG_IN_BUF 5 +-#define P1275_ARG_IN_64B 6 +- +-#define P1275_IN(x) ((x) & 0xf) +-#define P1275_OUT(x) (((x) << 4) & 0xf0) +-#define P1275_INOUT(i,o) (P1275_IN(i)|P1275_OUT(o)) +-#define P1275_ARG(n,x) ((x) << ((n)*3 + 8)) ++extern void p1275_cmd_direct(unsigned long *); + + #endif /* !(__SPARC64_OPLIB_H) */ +--- a/arch/sparc/prom/cif.S ++++ b/arch/sparc/prom/cif.S +@@ -9,18 +9,18 @@ + #include + + .text +- .globl prom_cif_interface +-prom_cif_interface: +- sethi %hi(p1275buf), %o0 +- or %o0, %lo(p1275buf), %o0 +- ldx [%o0 + 0x010], %o1 ! prom_cif_stack +- save %o1, -192, %sp +- ldx [%i0 + 0x008], %l2 ! prom_cif_handler ++ .globl prom_cif_direct ++prom_cif_direct: ++ sethi %hi(p1275buf), %o1 ++ or %o1, %lo(p1275buf), %o1 ++ ldx [%o1 + 0x0010], %o2 ! prom_cif_stack ++ save %o2, -192, %sp ++ ldx [%i1 + 0x0008], %l2 ! prom_cif_handler + mov %g4, %l0 + mov %g5, %l1 + mov %g6, %l3 + call %l2 +- add %i0, 0x018, %o0 ! prom_args ++ mov %i0, %o0 ! prom_args + mov %l0, %g4 + mov %l1, %g5 + mov %l3, %g6 +--- a/arch/sparc/prom/console_64.c ++++ b/arch/sparc/prom/console_64.c +@@ -21,14 +21,22 @@ extern int prom_stdin, prom_stdout; + inline int + prom_nbgetchar(void) + { ++ unsigned long args[7]; + char inc; + +- if (p1275_cmd("read", P1275_ARG(1,P1275_ARG_OUT_BUF)| +- P1275_INOUT(3,1), +- prom_stdin, &inc, P1275_SIZE(1)) == 1) ++ args[0] = (unsigned long) "read"; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) prom_stdin; ++ args[4] = (unsigned long) &inc; ++ args[5] = 1; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ if (args[6] == 1) + return inc; +- else +- return -1; ++ return -1; + } + + /* Non blocking put character to console device, returns -1 if +@@ -37,12 +45,22 @@ prom_nbgetchar(void) + inline int + prom_nbputchar(char c) + { ++ unsigned long args[7]; + char outc; + + outc = c; +- if (p1275_cmd("write", P1275_ARG(1,P1275_ARG_IN_BUF)| +- P1275_INOUT(3,1), +- prom_stdout, &outc, P1275_SIZE(1)) == 1) ++ ++ args[0] = (unsigned long) "write"; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) prom_stdout; ++ args[4] = (unsigned long) &outc; ++ args[5] = 1; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ if (args[6] == 1) + return 0; + else + return -1; +@@ -68,7 +86,15 @@ prom_putchar(char c) + void + prom_puts(const char *s, int len) + { +- p1275_cmd("write", P1275_ARG(1,P1275_ARG_IN_BUF)| +- P1275_INOUT(3,1), +- prom_stdout, s, P1275_SIZE(len)); ++ unsigned long args[7]; ++ ++ args[0] = (unsigned long) "write"; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) prom_stdout; ++ args[4] = (unsigned long) s; ++ args[5] = len; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); + } +--- a/arch/sparc/prom/devops_64.c ++++ b/arch/sparc/prom/devops_64.c +@@ -18,16 +18,32 @@ + int + prom_devopen(const char *dstr) + { +- return p1275_cmd ("open", P1275_ARG(0,P1275_ARG_IN_STRING)| +- P1275_INOUT(1,1), +- dstr); ++ unsigned long args[5]; ++ ++ args[0] = (unsigned long) "open"; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned long) dstr; ++ args[4] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[4]; + } + + /* Close the device described by device handle 'dhandle'. */ + int + prom_devclose(int dhandle) + { +- p1275_cmd ("close", P1275_INOUT(1,0), dhandle); ++ unsigned long args[4]; ++ ++ args[0] = (unsigned long) "close"; ++ args[1] = 1; ++ args[2] = 0; ++ args[3] = (unsigned int) dhandle; ++ ++ p1275_cmd_direct(args); ++ + return 0; + } + +@@ -37,5 +53,15 @@ prom_devclose(int dhandle) + void + prom_seek(int dhandle, unsigned int seekhi, unsigned int seeklo) + { +- p1275_cmd ("seek", P1275_INOUT(3,1), dhandle, seekhi, seeklo); ++ unsigned long args[7]; ++ ++ args[0] = (unsigned long) "seek"; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) dhandle; ++ args[4] = seekhi; ++ args[5] = seeklo; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); + } +--- a/arch/sparc/prom/misc_64.c ++++ b/arch/sparc/prom/misc_64.c +@@ -20,10 +20,17 @@ + + int prom_service_exists(const char *service_name) + { +- int err = p1275_cmd("test", P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_INOUT(1, 1), service_name); ++ unsigned long args[5]; + +- if (err) ++ args[0] = (unsigned long) "test"; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned long) service_name; ++ args[4] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ if (args[4]) + return 0; + return 1; + } +@@ -31,30 +38,47 @@ int prom_service_exists(const char *serv + void prom_sun4v_guest_soft_state(void) + { + const char *svc = "SUNW,soft-state-supported"; ++ unsigned long args[3]; + + if (!prom_service_exists(svc)) + return; +- p1275_cmd(svc, P1275_INOUT(0, 0)); ++ args[0] = (unsigned long) svc; ++ args[1] = 0; ++ args[2] = 0; ++ p1275_cmd_direct(args); + } + + /* Reset and reboot the machine with the command 'bcommand'. */ + void prom_reboot(const char *bcommand) + { ++ unsigned long args[4]; ++ + #ifdef CONFIG_SUN_LDOMS + if (ldom_domaining_enabled) + ldom_reboot(bcommand); + #endif +- p1275_cmd("boot", P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_INOUT(1, 0), bcommand); ++ args[0] = (unsigned long) "boot"; ++ args[1] = 1; ++ args[2] = 0; ++ args[3] = (unsigned long) bcommand; ++ ++ p1275_cmd_direct(args); + } + + /* Forth evaluate the expression contained in 'fstring'. */ + void prom_feval(const char *fstring) + { ++ unsigned long args[5]; ++ + if (!fstring || fstring[0] == 0) + return; +- p1275_cmd("interpret", P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_INOUT(1, 1), fstring); ++ args[0] = (unsigned long) "interpret"; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned long) fstring; ++ args[4] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); + } + EXPORT_SYMBOL(prom_feval); + +@@ -68,6 +92,7 @@ extern void smp_release(void); + */ + void prom_cmdline(void) + { ++ unsigned long args[3]; + unsigned long flags; + + local_irq_save(flags); +@@ -76,7 +101,11 @@ void prom_cmdline(void) + smp_capture(); + #endif + +- p1275_cmd("enter", P1275_INOUT(0, 0)); ++ args[0] = (unsigned long) "enter"; ++ args[1] = 0; ++ args[2] = 0; ++ ++ p1275_cmd_direct(args); + + #ifdef CONFIG_SMP + smp_release(); +@@ -90,22 +119,32 @@ void prom_cmdline(void) + */ + void notrace prom_halt(void) + { ++ unsigned long args[3]; ++ + #ifdef CONFIG_SUN_LDOMS + if (ldom_domaining_enabled) + ldom_power_off(); + #endif + again: +- p1275_cmd("exit", P1275_INOUT(0, 0)); ++ args[0] = (unsigned long) "exit"; ++ args[1] = 0; ++ args[2] = 0; ++ p1275_cmd_direct(args); + goto again; /* PROM is out to get me -DaveM */ + } + + void prom_halt_power_off(void) + { ++ unsigned long args[3]; ++ + #ifdef CONFIG_SUN_LDOMS + if (ldom_domaining_enabled) + ldom_power_off(); + #endif +- p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0)); ++ args[0] = (unsigned long) "SUNW,power-off"; ++ args[1] = 0; ++ args[2] = 0; ++ p1275_cmd_direct(args); + + /* if nothing else helps, we just halt */ + prom_halt(); +@@ -114,10 +153,15 @@ void prom_halt_power_off(void) + /* Set prom sync handler to call function 'funcp'. */ + void prom_setcallback(callback_func_t funcp) + { ++ unsigned long args[5]; + if (!funcp) + return; +- p1275_cmd("set-callback", P1275_ARG(0, P1275_ARG_IN_FUNCTION) | +- P1275_INOUT(1, 1), funcp); ++ args[0] = (unsigned long) "set-callback"; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned long) funcp; ++ args[4] = (unsigned long) -1; ++ p1275_cmd_direct(args); + } + + /* Get the idprom and stuff it into buffer 'idbuf'. Returns the +@@ -173,57 +217,61 @@ static int prom_get_memory_ihandle(void) + } + + /* Load explicit I/D TLB entries. */ ++static long tlb_load(const char *type, unsigned long index, ++ unsigned long tte_data, unsigned long vaddr) ++{ ++ unsigned long args[9]; ++ ++ args[0] = (unsigned long) prom_callmethod_name; ++ args[1] = 5; ++ args[2] = 1; ++ args[3] = (unsigned long) type; ++ args[4] = (unsigned int) prom_get_mmu_ihandle(); ++ args[5] = vaddr; ++ args[6] = tte_data; ++ args[7] = index; ++ args[8] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (long) args[8]; ++} ++ + long prom_itlb_load(unsigned long index, + unsigned long tte_data, + unsigned long vaddr) + { +- return p1275_cmd(prom_callmethod_name, +- (P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_ARG(2, P1275_ARG_IN_64B) | +- P1275_ARG(3, P1275_ARG_IN_64B) | +- P1275_INOUT(5, 1)), +- "SUNW,itlb-load", +- prom_get_mmu_ihandle(), +- /* And then our actual args are pushed backwards. */ +- vaddr, +- tte_data, +- index); ++ return tlb_load("SUNW,itlb-load", index, tte_data, vaddr); + } + + long prom_dtlb_load(unsigned long index, + unsigned long tte_data, + unsigned long vaddr) + { +- return p1275_cmd(prom_callmethod_name, +- (P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_ARG(2, P1275_ARG_IN_64B) | +- P1275_ARG(3, P1275_ARG_IN_64B) | +- P1275_INOUT(5, 1)), +- "SUNW,dtlb-load", +- prom_get_mmu_ihandle(), +- /* And then our actual args are pushed backwards. */ +- vaddr, +- tte_data, +- index); ++ return tlb_load("SUNW,dtlb-load", index, tte_data, vaddr); + } + + int prom_map(int mode, unsigned long size, + unsigned long vaddr, unsigned long paddr) + { +- int ret = p1275_cmd(prom_callmethod_name, +- (P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_ARG(3, P1275_ARG_IN_64B) | +- P1275_ARG(4, P1275_ARG_IN_64B) | +- P1275_ARG(6, P1275_ARG_IN_64B) | +- P1275_INOUT(7, 1)), +- prom_map_name, +- prom_get_mmu_ihandle(), +- mode, +- size, +- vaddr, +- 0, +- paddr); ++ unsigned long args[11]; ++ int ret; + ++ args[0] = (unsigned long) prom_callmethod_name; ++ args[1] = 7; ++ args[2] = 1; ++ args[3] = (unsigned long) prom_map_name; ++ args[4] = (unsigned int) prom_get_mmu_ihandle(); ++ args[5] = (unsigned int) mode; ++ args[6] = size; ++ args[7] = vaddr; ++ args[8] = 0; ++ args[9] = paddr; ++ args[10] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ ret = (int) args[10]; + if (ret == 0) + ret = -1; + return ret; +@@ -231,40 +279,51 @@ int prom_map(int mode, unsigned long siz + + void prom_unmap(unsigned long size, unsigned long vaddr) + { +- p1275_cmd(prom_callmethod_name, +- (P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_ARG(2, P1275_ARG_IN_64B) | +- P1275_ARG(3, P1275_ARG_IN_64B) | +- P1275_INOUT(4, 0)), +- prom_unmap_name, +- prom_get_mmu_ihandle(), +- size, +- vaddr); ++ unsigned long args[7]; ++ ++ args[0] = (unsigned long) prom_callmethod_name; ++ args[1] = 4; ++ args[2] = 0; ++ args[3] = (unsigned long) prom_unmap_name; ++ args[4] = (unsigned int) prom_get_mmu_ihandle(); ++ args[5] = size; ++ args[6] = vaddr; ++ ++ p1275_cmd_direct(args); + } + + /* Set aside physical memory which is not touched or modified + * across soft resets. + */ +-unsigned long prom_retain(const char *name, +- unsigned long pa_low, unsigned long pa_high, +- long size, long align) +-{ +- /* XXX I don't think we return multiple values correctly. +- * XXX OBP supposedly returns pa_low/pa_high here, how does +- * XXX it work? +- */ ++int prom_retain(const char *name, unsigned long size, ++ unsigned long align, unsigned long *paddr) ++{ ++ unsigned long args[11]; + +- /* If align is zero, the pa_low/pa_high args are passed, +- * else they are not. ++ args[0] = (unsigned long) prom_callmethod_name; ++ args[1] = 5; ++ args[2] = 3; ++ args[3] = (unsigned long) "SUNW,retain"; ++ args[4] = (unsigned int) prom_get_memory_ihandle(); ++ args[5] = align; ++ args[6] = size; ++ args[7] = (unsigned long) name; ++ args[8] = (unsigned long) -1; ++ args[9] = (unsigned long) -1; ++ args[10] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ if (args[8]) ++ return (int) args[8]; ++ ++ /* Next we get "phys_high" then "phys_low". On 64-bit ++ * the phys_high cell is don't care since the phys_low ++ * cell has the full value. + */ +- if (align == 0) +- return p1275_cmd("SUNW,retain", +- (P1275_ARG(0, P1275_ARG_IN_BUF) | P1275_INOUT(5, 2)), +- name, pa_low, pa_high, size, align); +- else +- return p1275_cmd("SUNW,retain", +- (P1275_ARG(0, P1275_ARG_IN_BUF) | P1275_INOUT(3, 2)), +- name, size, align); ++ *paddr = args[10]; ++ ++ return 0; + } + + /* Get "Unumber" string for the SIMM at the given +@@ -277,62 +336,129 @@ int prom_getunumber(int syndrome_code, + unsigned long phys_addr, + char *buf, int buflen) + { +- return p1275_cmd(prom_callmethod_name, +- (P1275_ARG(0, P1275_ARG_IN_STRING) | +- P1275_ARG(3, P1275_ARG_OUT_BUF) | +- P1275_ARG(6, P1275_ARG_IN_64B) | +- P1275_INOUT(8, 2)), +- "SUNW,get-unumber", prom_get_memory_ihandle(), +- buflen, buf, P1275_SIZE(buflen), +- 0, phys_addr, syndrome_code); ++ unsigned long args[12]; ++ ++ args[0] = (unsigned long) prom_callmethod_name; ++ args[1] = 7; ++ args[2] = 2; ++ args[3] = (unsigned long) "SUNW,get-unumber"; ++ args[4] = (unsigned int) prom_get_memory_ihandle(); ++ args[5] = buflen; ++ args[6] = (unsigned long) buf; ++ args[7] = 0; ++ args[8] = phys_addr; ++ args[9] = (unsigned int) syndrome_code; ++ args[10] = (unsigned long) -1; ++ args[11] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[10]; + } + + /* Power management extensions. */ + void prom_sleepself(void) + { +- p1275_cmd("SUNW,sleep-self", P1275_INOUT(0, 0)); ++ unsigned long args[3]; ++ ++ args[0] = (unsigned long) "SUNW,sleep-self"; ++ args[1] = 0; ++ args[2] = 0; ++ p1275_cmd_direct(args); + } + + int prom_sleepsystem(void) + { +- return p1275_cmd("SUNW,sleep-system", P1275_INOUT(0, 1)); ++ unsigned long args[4]; ++ ++ args[0] = (unsigned long) "SUNW,sleep-system"; ++ args[1] = 0; ++ args[2] = 1; ++ args[3] = (unsigned long) -1; ++ p1275_cmd_direct(args); ++ ++ return (int) args[3]; + } + + int prom_wakeupsystem(void) + { +- return p1275_cmd("SUNW,wakeup-system", P1275_INOUT(0, 1)); ++ unsigned long args[4]; ++ ++ args[0] = (unsigned long) "SUNW,wakeup-system"; ++ args[1] = 0; ++ args[2] = 1; ++ args[3] = (unsigned long) -1; ++ p1275_cmd_direct(args); ++ ++ return (int) args[3]; + } + + #ifdef CONFIG_SMP + void prom_startcpu(int cpunode, unsigned long pc, unsigned long arg) + { +- p1275_cmd("SUNW,start-cpu", P1275_INOUT(3, 0), cpunode, pc, arg); ++ unsigned long args[6]; ++ ++ args[0] = (unsigned long) "SUNW,start-cpu"; ++ args[1] = 3; ++ args[2] = 0; ++ args[3] = (unsigned int) cpunode; ++ args[4] = pc; ++ args[5] = arg; ++ p1275_cmd_direct(args); + } + + void prom_startcpu_cpuid(int cpuid, unsigned long pc, unsigned long arg) + { +- p1275_cmd("SUNW,start-cpu-by-cpuid", P1275_INOUT(3, 0), +- cpuid, pc, arg); ++ unsigned long args[6]; ++ ++ args[0] = (unsigned long) "SUNW,start-cpu-by-cpuid"; ++ args[1] = 3; ++ args[2] = 0; ++ args[3] = (unsigned int) cpuid; ++ args[4] = pc; ++ args[5] = arg; ++ p1275_cmd_direct(args); + } + + void prom_stopcpu_cpuid(int cpuid) + { +- p1275_cmd("SUNW,stop-cpu-by-cpuid", P1275_INOUT(1, 0), +- cpuid); ++ unsigned long args[4]; ++ ++ args[0] = (unsigned long) "SUNW,stop-cpu-by-cpuid"; ++ args[1] = 1; ++ args[2] = 0; ++ args[3] = (unsigned int) cpuid; ++ p1275_cmd_direct(args); + } + + void prom_stopself(void) + { +- p1275_cmd("SUNW,stop-self", P1275_INOUT(0, 0)); ++ unsigned long args[3]; ++ ++ args[0] = (unsigned long) "SUNW,stop-self"; ++ args[1] = 0; ++ args[2] = 0; ++ p1275_cmd_direct(args); + } + + void prom_idleself(void) + { +- p1275_cmd("SUNW,idle-self", P1275_INOUT(0, 0)); ++ unsigned long args[3]; ++ ++ args[0] = (unsigned long) "SUNW,idle-self"; ++ args[1] = 0; ++ args[2] = 0; ++ p1275_cmd_direct(args); + } + + void prom_resumecpu(int cpunode) + { +- p1275_cmd("SUNW,resume-cpu", P1275_INOUT(1, 0), cpunode); ++ unsigned long args[4]; ++ ++ args[0] = (unsigned long) "SUNW,resume-cpu"; ++ args[1] = 1; ++ args[2] = 0; ++ args[3] = (unsigned int) cpunode; ++ p1275_cmd_direct(args); + } + #endif +--- a/arch/sparc/prom/p1275.c ++++ b/arch/sparc/prom/p1275.c +@@ -22,13 +22,11 @@ struct { + long prom_callback; /* 0x00 */ + void (*prom_cif_handler)(long *); /* 0x08 */ + unsigned long prom_cif_stack; /* 0x10 */ +- unsigned long prom_args [23]; /* 0x18 */ +- char prom_buffer [3000]; + } p1275buf; + + extern void prom_world(int); + +-extern void prom_cif_interface(void); ++extern void prom_cif_direct(unsigned long *args); + extern void prom_cif_callback(void); + + /* +@@ -36,114 +34,20 @@ extern void prom_cif_callback(void); + */ + DEFINE_SPINLOCK(prom_entry_lock); + +-long p1275_cmd(const char *service, long fmt, ...) ++void p1275_cmd_direct(unsigned long *args) + { +- char *p, *q; + unsigned long flags; +- int nargs, nrets, i; +- va_list list; +- long attrs, x; +- +- p = p1275buf.prom_buffer; + + raw_local_save_flags(flags); + raw_local_irq_restore(PIL_NMI); + spin_lock(&prom_entry_lock); + +- p1275buf.prom_args[0] = (unsigned long)p; /* service */ +- strcpy (p, service); +- p = (char *)(((long)(strchr (p, 0) + 8)) & ~7); +- p1275buf.prom_args[1] = nargs = (fmt & 0x0f); /* nargs */ +- p1275buf.prom_args[2] = nrets = ((fmt & 0xf0) >> 4); /* nrets */ +- attrs = fmt >> 8; +- va_start(list, fmt); +- for (i = 0; i < nargs; i++, attrs >>= 3) { +- switch (attrs & 0x7) { +- case P1275_ARG_NUMBER: +- p1275buf.prom_args[i + 3] = +- (unsigned)va_arg(list, long); +- break; +- case P1275_ARG_IN_64B: +- p1275buf.prom_args[i + 3] = +- va_arg(list, unsigned long); +- break; +- case P1275_ARG_IN_STRING: +- strcpy (p, va_arg(list, char *)); +- p1275buf.prom_args[i + 3] = (unsigned long)p; +- p = (char *)(((long)(strchr (p, 0) + 8)) & ~7); +- break; +- case P1275_ARG_OUT_BUF: +- (void) va_arg(list, char *); +- p1275buf.prom_args[i + 3] = (unsigned long)p; +- x = va_arg(list, long); +- i++; attrs >>= 3; +- p = (char *)(((long)(p + (int)x + 7)) & ~7); +- p1275buf.prom_args[i + 3] = x; +- break; +- case P1275_ARG_IN_BUF: +- q = va_arg(list, char *); +- p1275buf.prom_args[i + 3] = (unsigned long)p; +- x = va_arg(list, long); +- i++; attrs >>= 3; +- memcpy (p, q, (int)x); +- p = (char *)(((long)(p + (int)x + 7)) & ~7); +- p1275buf.prom_args[i + 3] = x; +- break; +- case P1275_ARG_OUT_32B: +- (void) va_arg(list, char *); +- p1275buf.prom_args[i + 3] = (unsigned long)p; +- p += 32; +- break; +- case P1275_ARG_IN_FUNCTION: +- p1275buf.prom_args[i + 3] = +- (unsigned long)prom_cif_callback; +- p1275buf.prom_callback = va_arg(list, long); +- break; +- } +- } +- va_end(list); +- + prom_world(1); +- prom_cif_interface(); ++ prom_cif_direct(args); + prom_world(0); + +- attrs = fmt >> 8; +- va_start(list, fmt); +- for (i = 0; i < nargs; i++, attrs >>= 3) { +- switch (attrs & 0x7) { +- case P1275_ARG_NUMBER: +- (void) va_arg(list, long); +- break; +- case P1275_ARG_IN_STRING: +- (void) va_arg(list, char *); +- break; +- case P1275_ARG_IN_FUNCTION: +- (void) va_arg(list, long); +- break; +- case P1275_ARG_IN_BUF: +- (void) va_arg(list, char *); +- (void) va_arg(list, long); +- i++; attrs >>= 3; +- break; +- case P1275_ARG_OUT_BUF: +- p = va_arg(list, char *); +- x = va_arg(list, long); +- memcpy (p, (char *)(p1275buf.prom_args[i + 3]), (int)x); +- i++; attrs >>= 3; +- break; +- case P1275_ARG_OUT_32B: +- p = va_arg(list, char *); +- memcpy (p, (char *)(p1275buf.prom_args[i + 3]), 32); +- break; +- } +- } +- va_end(list); +- x = p1275buf.prom_args [nargs + 3]; +- + spin_unlock(&prom_entry_lock); + raw_local_irq_restore(flags); +- +- return x; + } + + void prom_cif_init(void *cif_handler, void *cif_stack) +--- a/arch/sparc/prom/tree_64.c ++++ b/arch/sparc/prom/tree_64.c +@@ -16,22 +16,39 @@ + #include + #include + ++static int prom_node_to_node(const char *type, int node) ++{ ++ unsigned long args[5]; ++ ++ args[0] = (unsigned long) type; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned int) node; ++ args[4] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[4]; ++} ++ + /* Return the child of node 'node' or zero if no this node has no + * direct descendent. + */ + inline int __prom_getchild(int node) + { +- return p1275_cmd ("child", P1275_INOUT(1, 1), node); ++ return prom_node_to_node("child", node); + } + + inline int prom_getchild(int node) + { + int cnode; + +- if(node == -1) return 0; ++ if (node == -1) ++ return 0; + cnode = __prom_getchild(node); +- if(cnode == -1) return 0; +- return (int)cnode; ++ if (cnode == -1) ++ return 0; ++ return cnode; + } + EXPORT_SYMBOL(prom_getchild); + +@@ -39,10 +56,12 @@ inline int prom_getparent(int node) + { + int cnode; + +- if(node == -1) return 0; +- cnode = p1275_cmd ("parent", P1275_INOUT(1, 1), node); +- if(cnode == -1) return 0; +- return (int)cnode; ++ if (node == -1) ++ return 0; ++ cnode = prom_node_to_node("parent", node); ++ if (cnode == -1) ++ return 0; ++ return cnode; + } + + /* Return the next sibling of node 'node' or zero if no more siblings +@@ -50,7 +69,7 @@ inline int prom_getparent(int node) + */ + inline int __prom_getsibling(int node) + { +- return p1275_cmd(prom_peer_name, P1275_INOUT(1, 1), node); ++ return prom_node_to_node(prom_peer_name, node); + } + + inline int prom_getsibling(int node) +@@ -72,11 +91,21 @@ EXPORT_SYMBOL(prom_getsibling); + */ + inline int prom_getproplen(int node, const char *prop) + { +- if((!node) || (!prop)) return -1; +- return p1275_cmd ("getproplen", +- P1275_ARG(1,P1275_ARG_IN_STRING)| +- P1275_INOUT(2, 1), +- node, prop); ++ unsigned long args[6]; ++ ++ if (!node || !prop) ++ return -1; ++ ++ args[0] = (unsigned long) "getproplen"; ++ args[1] = 2; ++ args[2] = 1; ++ args[3] = (unsigned int) node; ++ args[4] = (unsigned long) prop; ++ args[5] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[5]; + } + EXPORT_SYMBOL(prom_getproplen); + +@@ -87,19 +116,25 @@ EXPORT_SYMBOL(prom_getproplen); + inline int prom_getproperty(int node, const char *prop, + char *buffer, int bufsize) + { ++ unsigned long args[8]; + int plen; + + plen = prom_getproplen(node, prop); +- if ((plen > bufsize) || (plen == 0) || (plen == -1)) { ++ if ((plen > bufsize) || (plen == 0) || (plen == -1)) + return -1; +- } else { +- /* Ok, things seem all right. */ +- return p1275_cmd(prom_getprop_name, +- P1275_ARG(1,P1275_ARG_IN_STRING)| +- P1275_ARG(2,P1275_ARG_OUT_BUF)| +- P1275_INOUT(4, 1), +- node, prop, buffer, P1275_SIZE(plen)); +- } ++ ++ args[0] = (unsigned long) prom_getprop_name; ++ args[1] = 4; ++ args[2] = 1; ++ args[3] = (unsigned int) node; ++ args[4] = (unsigned long) prop; ++ args[5] = (unsigned long) buffer; ++ args[6] = bufsize; ++ args[7] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[7]; + } + EXPORT_SYMBOL(prom_getproperty); + +@@ -110,7 +145,7 @@ inline int prom_getint(int node, const c + { + int intprop; + +- if(prom_getproperty(node, prop, (char *) &intprop, sizeof(int)) != -1) ++ if (prom_getproperty(node, prop, (char *) &intprop, sizeof(int)) != -1) + return intprop; + + return -1; +@@ -126,7 +161,8 @@ int prom_getintdefault(int node, const c + int retval; + + retval = prom_getint(node, property); +- if(retval == -1) return deflt; ++ if (retval == -1) ++ return deflt; + + return retval; + } +@@ -138,7 +174,8 @@ int prom_getbool(int node, const char *p + int retval; + + retval = prom_getproplen(node, prop); +- if(retval == -1) return 0; ++ if (retval == -1) ++ return 0; + return 1; + } + EXPORT_SYMBOL(prom_getbool); +@@ -152,7 +189,8 @@ void prom_getstring(int node, const char + int len; + + len = prom_getproperty(node, prop, user_buf, ubuf_size); +- if(len != -1) return; ++ if (len != -1) ++ return; + user_buf[0] = 0; + return; + } +@@ -165,7 +203,8 @@ int prom_nodematch(int node, const char + { + char namebuf[128]; + prom_getproperty(node, "name", namebuf, sizeof(namebuf)); +- if(strcmp(namebuf, name) == 0) return 1; ++ if (strcmp(namebuf, name) == 0) ++ return 1; + return 0; + } + +@@ -191,16 +230,29 @@ int prom_searchsiblings(int node_start, + } + EXPORT_SYMBOL(prom_searchsiblings); + ++static const char *prom_nextprop_name = "nextprop"; ++ + /* Return the first property type for node 'node'. + * buffer should be at least 32B in length + */ + inline char *prom_firstprop(int node, char *buffer) + { ++ unsigned long args[7]; ++ + *buffer = 0; +- if(node == -1) return buffer; +- p1275_cmd ("nextprop", P1275_ARG(2,P1275_ARG_OUT_32B)| +- P1275_INOUT(3, 0), +- node, (char *) 0x0, buffer); ++ if (node == -1) ++ return buffer; ++ ++ args[0] = (unsigned long) prom_nextprop_name; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) node; ++ args[4] = 0; ++ args[5] = (unsigned long) buffer; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ + return buffer; + } + EXPORT_SYMBOL(prom_firstprop); +@@ -211,9 +263,10 @@ EXPORT_SYMBOL(prom_firstprop); + */ + inline char *prom_nextprop(int node, const char *oprop, char *buffer) + { ++ unsigned long args[7]; + char buf[32]; + +- if(node == -1) { ++ if (node == -1) { + *buffer = 0; + return buffer; + } +@@ -221,10 +274,17 @@ inline char *prom_nextprop(int node, con + strcpy (buf, oprop); + oprop = buf; + } +- p1275_cmd ("nextprop", P1275_ARG(1,P1275_ARG_IN_STRING)| +- P1275_ARG(2,P1275_ARG_OUT_32B)| +- P1275_INOUT(3, 0), +- node, oprop, buffer); ++ ++ args[0] = (unsigned long) prom_nextprop_name; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) node; ++ args[4] = (unsigned long) oprop; ++ args[5] = (unsigned long) buffer; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ + return buffer; + } + EXPORT_SYMBOL(prom_nextprop); +@@ -232,12 +292,19 @@ EXPORT_SYMBOL(prom_nextprop); + int + prom_finddevice(const char *name) + { ++ unsigned long args[5]; ++ + if (!name) + return 0; +- return p1275_cmd(prom_finddev_name, +- P1275_ARG(0,P1275_ARG_IN_STRING)| +- P1275_INOUT(1, 1), +- name); ++ args[0] = (unsigned long) "finddevice"; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned long) name; ++ args[4] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[4]; + } + EXPORT_SYMBOL(prom_finddevice); + +@@ -248,7 +315,7 @@ int prom_node_has_property(int node, con + *buf = 0; + do { + prom_nextprop(node, buf, buf); +- if(!strcmp(buf, prop)) ++ if (!strcmp(buf, prop)) + return 1; + } while (*buf); + return 0; +@@ -261,6 +328,8 @@ EXPORT_SYMBOL(prom_node_has_property); + int + prom_setprop(int node, const char *pname, char *value, int size) + { ++ unsigned long args[8]; ++ + if (size == 0) + return 0; + if ((pname == 0) || (value == 0)) +@@ -272,19 +341,37 @@ prom_setprop(int node, const char *pname + return 0; + } + #endif +- return p1275_cmd ("setprop", P1275_ARG(1,P1275_ARG_IN_STRING)| +- P1275_ARG(2,P1275_ARG_IN_BUF)| +- P1275_INOUT(4, 1), +- node, pname, value, P1275_SIZE(size)); ++ args[0] = (unsigned long) "setprop"; ++ args[1] = 4; ++ args[2] = 1; ++ args[3] = (unsigned int) node; ++ args[4] = (unsigned long) pname; ++ args[5] = (unsigned long) value; ++ args[6] = size; ++ args[7] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[7]; + } + EXPORT_SYMBOL(prom_setprop); + + inline int prom_inst2pkg(int inst) + { ++ unsigned long args[5]; + int node; + +- node = p1275_cmd ("instance-to-package", P1275_INOUT(1, 1), inst); +- if (node == -1) return 0; ++ args[0] = (unsigned long) "instance-to-package"; ++ args[1] = 1; ++ args[2] = 1; ++ args[3] = (unsigned int) inst; ++ args[4] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ node = (int) args[4]; ++ if (node == -1) ++ return 0; + return node; + } + +@@ -297,17 +384,28 @@ prom_pathtoinode(const char *path) + int node, inst; + + inst = prom_devopen (path); +- if (inst == 0) return 0; +- node = prom_inst2pkg (inst); +- prom_devclose (inst); +- if (node == -1) return 0; ++ if (inst == 0) ++ return 0; ++ node = prom_inst2pkg(inst); ++ prom_devclose(inst); ++ if (node == -1) ++ return 0; + return node; + } + + int prom_ihandle2path(int handle, char *buffer, int bufsize) + { +- return p1275_cmd("instance-to-path", +- P1275_ARG(1,P1275_ARG_OUT_BUF)| +- P1275_INOUT(3, 1), +- handle, buffer, P1275_SIZE(bufsize)); ++ unsigned long args[7]; ++ ++ args[0] = (unsigned long) "instance-to-path"; ++ args[1] = 3; ++ args[2] = 1; ++ args[3] = (unsigned int) handle; ++ args[4] = (unsigned long) buffer; ++ args[5] = bufsize; ++ args[6] = (unsigned long) -1; ++ ++ p1275_cmd_direct(args); ++ ++ return (int) args[6]; + } diff --git a/queue-2.6.32/tcp-combat-per-cpu-skew-in-orphan-tests.patch b/queue-2.6.32/tcp-combat-per-cpu-skew-in-orphan-tests.patch new file mode 100644 index 00000000000..05cda521020 --- /dev/null +++ b/queue-2.6.32/tcp-combat-per-cpu-skew-in-orphan-tests.patch @@ -0,0 +1,97 @@ +From 7c11c309aae7ef96e223eac6cdf03b35105cade5 Mon Sep 17 00:00:00 2001 +From: David S. Miller +Date: Wed, 25 Aug 2010 02:27:49 -0700 +Subject: tcp: Combat per-cpu skew in orphan tests. + + +From: David S. Miller + +[ Upstream commit ad1af0fedba14f82b240a03fe20eb9b2fdbd0357 ] + +As reported by Anton Blanchard when we use +percpu_counter_read_positive() to make our orphan socket limit checks, +the check can be off by up to num_cpus_online() * batch (which is 32 +by default) which on a 128 cpu machine can be as large as the default +orphan limit itself. + +Fix this by doing the full expensive sum check if the optimized check +triggers. + +Reported-by: Anton Blanchard +Signed-off-by: David S. Miller +Acked-by: Eric Dumazet +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 18 ++++++++++++++---- + net/ipv4/tcp.c | 5 +---- + net/ipv4/tcp_timer.c | 8 ++++---- + 3 files changed, 19 insertions(+), 12 deletions(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -259,11 +259,21 @@ static inline int between(__u32 seq1, __ + return seq3 - seq2 >= seq1 - seq2; + } + +-static inline int tcp_too_many_orphans(struct sock *sk, int num) ++static inline bool tcp_too_many_orphans(struct sock *sk, int shift) + { +- return (num > sysctl_tcp_max_orphans) || +- (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && +- atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]); ++ struct percpu_counter *ocp = sk->sk_prot->orphan_count; ++ int orphans = percpu_counter_read_positive(ocp); ++ ++ if (orphans << shift > sysctl_tcp_max_orphans) { ++ orphans = percpu_counter_sum_positive(ocp); ++ if (orphans << shift > sysctl_tcp_max_orphans) ++ return true; ++ } ++ ++ if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && ++ atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) ++ return true; ++ return false; + } + + /* syncookies: remember time of last synqueue overflow */ +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1976,11 +1976,8 @@ adjudge_to_death: + } + } + if (sk->sk_state != TCP_CLOSE) { +- int orphan_count = percpu_counter_read_positive( +- sk->sk_prot->orphan_count); +- + sk_mem_reclaim(sk); +- if (tcp_too_many_orphans(sk, orphan_count)) { ++ if (tcp_too_many_orphans(sk, 0)) { + if (net_ratelimit()) + printk(KERN_INFO "TCP: too many of orphaned " + "sockets\n"); +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -65,18 +65,18 @@ static void tcp_write_err(struct sock *s + static int tcp_out_of_resources(struct sock *sk, int do_reset) + { + struct tcp_sock *tp = tcp_sk(sk); +- int orphans = percpu_counter_read_positive(&tcp_orphan_count); ++ int shift = 0; + + /* If peer does not open window for long time, or did not transmit + * anything for long time, penalize it. */ + if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) +- orphans <<= 1; ++ shift++; + + /* If some dubious ICMP arrived, penalize even more. */ + if (sk->sk_err_soft) +- orphans <<= 1; ++ shift++; + +- if (tcp_too_many_orphans(sk, orphans)) { ++ if (tcp_too_many_orphans(sk, shift)) { + if (net_ratelimit()) + printk(KERN_INFO "Out of socket memory\n"); + diff --git a/queue-2.6.32/tcp-fix-three-tcp-sysctls-tuning.patch b/queue-2.6.32/tcp-fix-three-tcp-sysctls-tuning.patch new file mode 100644 index 00000000000..3db97bf5b85 --- /dev/null +++ b/queue-2.6.32/tcp-fix-three-tcp-sysctls-tuning.patch @@ -0,0 +1,76 @@ +From cf69069853065815fba61d3584ea708258dfaed6 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Sun, 19 Sep 2010 21:38:12 -0700 +Subject: tcp: fix three tcp sysctls tuning + + +From: Eric Dumazet + +[ Upstream commit c5ed63d66f24fd4f7089b5a6e087b0ce7202aa8e ] + +As discovered by Anton Blanchard, current code to autotune +tcp_death_row.sysctl_max_tw_buckets, sysctl_tcp_max_orphans and +sysctl_max_syn_backlog makes little sense. + +The bigger a page is, the less tcp_max_orphans is : 4096 on a 512GB +machine in Anton's case. + +(tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)) +is much bigger if spinlock debugging is on. Its wrong to select bigger +limits in this case (where kernel structures are also bigger) + +bhash_size max is 65536, and we get this value even for small machines. + +A better ground is to use size of ehash table, this also makes code +shorter and more obvious. + +Based on a patch from Anton, and another from David. + +Reported-and-tested-by: Anton Blanchard +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 24 +++++++----------------- + 1 file changed, 7 insertions(+), 17 deletions(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2878,7 +2878,7 @@ void __init tcp_init(void) + { + struct sk_buff *skb = NULL; + unsigned long nr_pages, limit; +- int order, i, max_share; ++ int i, max_share, cnt; + + BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); + +@@ -2927,22 +2927,12 @@ void __init tcp_init(void) + INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); + } + +- /* Try to be a bit smarter and adjust defaults depending +- * on available memory. +- */ +- for (order = 0; ((1 << order) << PAGE_SHIFT) < +- (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); +- order++) +- ; +- if (order >= 4) { +- tcp_death_row.sysctl_max_tw_buckets = 180000; +- sysctl_tcp_max_orphans = 4096 << (order - 4); +- sysctl_max_syn_backlog = 1024; +- } else if (order < 3) { +- tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); +- sysctl_tcp_max_orphans >>= (3 - order); +- sysctl_max_syn_backlog = 128; +- } ++ ++ cnt = tcp_hashinfo.ehash_size; ++ ++ tcp_death_row.sysctl_max_tw_buckets = cnt / 2; ++ sysctl_tcp_max_orphans = cnt / 2; ++ sysctl_max_syn_backlog = max(128, cnt / 256); + + /* Set the pressure threshold to be a fraction of global memory that + * is up to 1/2 at 256 MB, decreasing toward zero with the amount of diff --git a/queue-2.6.32/tcp-prevent-overzealous-packetization-by-sws-logic.patch b/queue-2.6.32/tcp-prevent-overzealous-packetization-by-sws-logic.patch new file mode 100644 index 00000000000..7e706ab0483 --- /dev/null +++ b/queue-2.6.32/tcp-prevent-overzealous-packetization-by-sws-logic.patch @@ -0,0 +1,56 @@ +From ef640efee8b476c5c600854ce26891d68f6b4d05 Mon Sep 17 00:00:00 2001 +From: Alexey Kuznetsov +Date: Wed, 15 Sep 2010 10:27:52 -0700 +Subject: tcp: Prevent overzealous packetization by SWS logic. + + +From: Alexey Kuznetsov + +[ Upstream commit 01f83d69844d307be2aa6fea88b0e8fe5cbdb2f4 ] + +If peer uses tiny MSS (say, 75 bytes) and similarly tiny advertised +window, the SWS logic will packetize to half the MSS unnecessarily. + +This causes problems with some embedded devices. + +However for large MSS devices we do want to half-MSS packetize +otherwise we never get enough packets into the pipe for things +like fast retransmit and recovery to work. + +Be careful also to handle the case where MSS > window, otherwise +we'll never send until the probe timer. + +Reported-by: ツ Leandro Melo de Sales +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -511,8 +511,22 @@ extern unsigned int tcp_current_mss(stru + /* Bound MSS / TSO packet size with the half of the window */ + static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) + { +- if (tp->max_window && pktsize > (tp->max_window >> 1)) +- return max(tp->max_window >> 1, 68U - tp->tcp_header_len); ++ int cutoff; ++ ++ /* When peer uses tiny windows, there is no use in packetizing ++ * to sub-MSS pieces for the sake of SWS or making sure there ++ * are enough packets in the pipe for fast recovery. ++ * ++ * On the other hand, for extremely large MSS devices, handling ++ * smaller than MSS windows in this way does make sense. ++ */ ++ if (tp->max_window >= 512) ++ cutoff = (tp->max_window >> 1); ++ else ++ cutoff = tp->max_window; ++ ++ if (cutoff && pktsize > cutoff) ++ return max_t(int, cutoff, 68U - tp->tcp_header_len); + else + return pktsize; + } diff --git a/queue-2.6.32/tcp-select-writefds-don-t-hang-up-when-a-peer-close-connection.patch b/queue-2.6.32/tcp-select-writefds-don-t-hang-up-when-a-peer-close-connection.patch new file mode 100644 index 00000000000..6bdb1cb28be --- /dev/null +++ b/queue-2.6.32/tcp-select-writefds-don-t-hang-up-when-a-peer-close-connection.patch @@ -0,0 +1,72 @@ +From 5a21a10b5f11e50237ab5a264deb0d5e55ba90fb Mon Sep 17 00:00:00 2001 +From: KOSAKI Motohiro +Date: Tue, 24 Aug 2010 16:05:48 +0000 +Subject: tcp: select(writefds) don't hang up when a peer close connection + + +From: KOSAKI Motohiro + +[ Upstream commit d84ba638e4ba3c40023ff997aa5e8d3ed002af36 ] + +This issue come from ruby language community. Below test program +hang up when only run on Linux. + + % uname -mrsv + Linux 2.6.26-2-486 #1 Sat Dec 26 08:37:39 UTC 2009 i686 + % ruby -rsocket -ve ' + BasicSocket.do_not_reverse_lookup = true + serv = TCPServer.open("127.0.0.1", 0) + s1 = TCPSocket.open("127.0.0.1", serv.addr[1]) + s2 = serv.accept + s2.close + s1.write("a") rescue p $! + s1.write("a") rescue p $! + Thread.new { + s1.write("a") + }.join' + ruby 1.9.3dev (2010-07-06 trunk 28554) [i686-linux] + # + [Hang Here] + +FreeBSD, Solaris, Mac doesn't. because Ruby's write() method call +select() internally. and tcp_poll has a bug. + +SUS defined 'ready for writing' of select() as following. + +| A descriptor shall be considered ready for writing when a call to an output +| function with O_NONBLOCK clear would not block, whether or not the function +| would transfer data successfully. + +That said, EPIPE situation is clearly one of 'ready for writing'. + +We don't have read-side issue because tcp_poll() already has read side +shutdown care. + +| if (sk->sk_shutdown & RCV_SHUTDOWN) +| mask |= POLLIN | POLLRDNORM | POLLRDHUP; + +So, Let's insert same logic in write side. + +- reference url + http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31065 + http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31068 + +Signed-off-by: KOSAKI Motohiro +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -451,7 +451,8 @@ unsigned int tcp_poll(struct file *file, + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) + mask |= POLLOUT | POLLWRNORM; + } +- } ++ } else ++ mask |= POLLOUT | POLLWRNORM; + + if (tp->urg_data & TCP_URG_VALID) + mask |= POLLPRI; diff --git a/queue-2.6.32/unix-do-not-loop-forever-at-unix_autobind.patch b/queue-2.6.32/unix-do-not-loop-forever-at-unix_autobind.patch new file mode 100644 index 00000000000..788ca7aad09 --- /dev/null +++ b/queue-2.6.32/unix-do-not-loop-forever-at-unix_autobind.patch @@ -0,0 +1,70 @@ +From 28c2b2d688b98135d776d988b9bfcd4e935b17ac Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa +Date: Sat, 4 Sep 2010 01:34:28 +0000 +Subject: UNIX: Do not loop forever at unix_autobind(). + + +From: Tetsuo Handa + +[ Upstream commit a9117426d0fcc05a194f728159a2d43df43c7add ] + +We assumed that unix_autobind() never fails if kzalloc() succeeded. +But unix_autobind() allows only 1048576 names. If /proc/sys/fs/file-max is +larger than 1048576 (e.g. systems with more than 10GB of RAM), a local user can +consume all names using fork()/socket()/bind(). + +If all names are in use, those who call bind() with addr_len == sizeof(short) +or connect()/sendmsg() with setsockopt(SO_PASSCRED) will continue + + while (1) + yield(); + +loop at unix_autobind() till a name becomes available. +This patch adds a loop counter in order to give up after 1048576 attempts. + +Calling yield() for once per 256 attempts may not be sufficient when many names +are already in use, for __unix_find_socket_byname() can take long time under +such circumstance. Therefore, this patch also adds cond_resched() call. + +Note that currently a local user can consume 2GB of kernel memory if the user +is allowed to create and autobind 1048576 UNIX domain sockets. We should +consider adding some restriction for autobind operation. + +Signed-off-by: Tetsuo Handa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -671,6 +671,7 @@ static int unix_autobind(struct socket * + static u32 ordernum = 1; + struct unix_address *addr; + int err; ++ unsigned int retries = 0; + + mutex_lock(&u->readlock); + +@@ -696,9 +697,17 @@ retry: + if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, + addr->hash)) { + spin_unlock(&unix_table_lock); +- /* Sanity yield. It is unusual case, but yet... */ +- if (!(ordernum&0xFF)) +- yield(); ++ /* ++ * __unix_find_socket_byname() may take long time if many names ++ * are already in use. ++ */ ++ cond_resched(); ++ /* Give up if all names seems to be in use. */ ++ if (retries++ == 0xFFFFF) { ++ err = -ENOSPC; ++ kfree(addr); ++ goto out; ++ } + goto retry; + } + addr->hash ^= sk->sk_type; diff --git a/queue-2.6.32/usb-serial-mos-prevent-reading-uninitialized-stack-memory.patch b/queue-2.6.32/usb-serial-mos-prevent-reading-uninitialized-stack-memory.patch new file mode 100644 index 00000000000..920f2f44cc8 --- /dev/null +++ b/queue-2.6.32/usb-serial-mos-prevent-reading-uninitialized-stack-memory.patch @@ -0,0 +1,47 @@ +From a0846f1868b11cd827bdfeaf4527d8b1b1c0b098 Mon Sep 17 00:00:00 2001 +From: Dan Rosenberg +Date: Wed, 15 Sep 2010 17:44:16 -0400 +Subject: USB: serial/mos*: prevent reading uninitialized stack memory + +From: Dan Rosenberg + +commit a0846f1868b11cd827bdfeaf4527d8b1b1c0b098 upstream. + +The TIOCGICOUNT device ioctl in both mos7720.c and mos7840.c allows +unprivileged users to read uninitialized stack memory, because the +"reserved" member of the serial_icounter_struct struct declared on the +stack is not altered or zeroed before being copied back to the user. +This patch takes care of it. + +Signed-off-by: Dan Rosenberg +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/usb/serial/mos7720.c | 3 +++ + drivers/usb/serial/mos7840.c | 3 +++ + 2 files changed, 6 insertions(+) + +--- a/drivers/usb/serial/mos7720.c ++++ b/drivers/usb/serial/mos7720.c +@@ -1466,6 +1466,9 @@ static int mos7720_ioctl(struct tty_stru + + case TIOCGICOUNT: + cnow = mos7720_port->icount; ++ ++ memset(&icount, 0, sizeof(struct serial_icounter_struct)); ++ + icount.cts = cnow.cts; + icount.dsr = cnow.dsr; + icount.rng = cnow.rng; +--- a/drivers/usb/serial/mos7840.c ++++ b/drivers/usb/serial/mos7840.c +@@ -2287,6 +2287,9 @@ static int mos7840_ioctl(struct tty_stru + case TIOCGICOUNT: + cnow = mos7840_port->icount; + smp_rmb(); ++ ++ memset(&icount, 0, sizeof(struct serial_icounter_struct)); ++ + icount.cts = cnow.cts; + icount.dsr = cnow.dsr; + icount.rng = cnow.rng;