--- /dev/null
+From ab12811c89e88f2e66746790b1fe4469ccb7bdd9 Mon Sep 17 00:00:00 2001
+From: Andy Gospodarek <andy@greyhouse.net>
+Date: Fri, 10 Sep 2010 11:43:20 +0000
+Subject: bonding: correctly process non-linear skbs
+
+From: Andy Gospodarek <andy@greyhouse.net>
+
+commit ab12811c89e88f2e66746790b1fe4469ccb7bdd9 upstream.
+
+It was recently brought to my attention that 802.3ad mode bonds would no
+longer form when using some network hardware after a driver update.
+After snooping around I realized that the particular hardware was using
+page-based skbs and found that skb->data did not contain a valid LACPDU
+as it was not stored there. That explained the inability to form an
+802.3ad-based bond. For balance-alb mode bonds this was also an issue
+as ARPs would not be properly processed.
+
+This patch fixes the issue in my tests and should be applied to 2.6.36
+and as far back as anyone cares to add it to stable.
+
+Thanks to Alexander Duyck <alexander.h.duyck@intel.com> and Jesse
+Brandeburg <jesse.brandeburg@intel.com> for the suggestions on this one.
+
+Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
+CC: Alexander Duyck <alexander.h.duyck@intel.com>
+CC: Jesse Brandeburg <jesse.brandeburg@intel.com>
+Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/bonding/bond_3ad.c | 3 +++
+ drivers/net/bonding/bond_alb.c | 3 +++
+ 2 files changed, 6 insertions(+)
+
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -2466,6 +2466,9 @@ int bond_3ad_lacpdu_recv(struct sk_buff
+ if (!(dev->flags & IFF_MASTER))
+ goto out;
+
++ if (!pskb_may_pull(skb, sizeof(struct lacpdu)))
++ goto out;
++
+ read_lock(&bond->lock);
+ slave = bond_get_slave_by_dev((struct bonding *)netdev_priv(dev),
+ orig_dev);
+--- a/drivers/net/bonding/bond_alb.c
++++ b/drivers/net/bonding/bond_alb.c
+@@ -369,6 +369,9 @@ static int rlb_arp_recv(struct sk_buff *
+ goto out;
+ }
+
++ if (!pskb_may_pull(skb, arp_hdr_len(bond_dev)))
++ goto out;
++
+ if (skb->len < sizeof(struct arp_pkt)) {
+ pr_debug("Packet is too small to be an ARP\n");
+ goto out;
--- /dev/null
+From 29b17c3b0487c9d05affb9c50c76508fd73a7a63 Mon Sep 17 00:00:00 2001
+From: David S. Miller <davem@davemloft.net>
+Date: Wed, 1 Sep 2010 18:06:39 -0700
+Subject: bridge: Clear INET control block of SKBs passed into ip_fragment().
+
+
+From: David S. Miller <davem@davemloft.net>
+
+[ Upstream commit 4ce6b9e1621c187a32a47a17bf6be93b1dc4a3df ]
+
+In a similar vain to commit 17762060c25590bfddd68cc1131f28ec720f405f
+("bridge: Clear IPCB before possible entry into IP stack")
+
+Any time we call into the IP stack we have to make sure the state
+there is as expected by the ipv4 code.
+
+With help from Eric Dumazet and Herbert Xu.
+
+Reported-by: Brandan Das <brandan.das@stratus.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/bridge/br_netfilter.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/bridge/br_netfilter.c
++++ b/net/bridge/br_netfilter.c
+@@ -749,9 +749,11 @@ static int br_nf_dev_queue_xmit(struct s
+ {
+ if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
+ skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
+- !skb_is_gso(skb))
++ !skb_is_gso(skb)) {
++ /* BUG: Should really parse the IP options here. */
++ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+ return ip_fragment(skb, br_dev_queue_push_xmit);
+- else
++ } else
+ return br_dev_queue_push_xmit(skb);
+ }
+ #else
--- /dev/null
+From 49c37c0334a9b85d30ab3d6b5d1acb05ef2ef6de Mon Sep 17 00:00:00 2001
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+Date: Wed, 15 Sep 2010 11:43:12 +0000
+Subject: drivers/net/cxgb3/cxgb3_main.c: prevent reading uninitialized stack memory
+
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+
+commit 49c37c0334a9b85d30ab3d6b5d1acb05ef2ef6de upstream.
+
+Fixed formatting (tabs and line breaks).
+
+The CHELSIO_GET_QSET_NUM device ioctl allows unprivileged users to read
+4 bytes of uninitialized stack memory, because the "addr" member of the
+ch_reg struct declared on the stack in cxgb_extension_ioctl() is not
+altered or zeroed before being copied back to the user. This patch
+takes care of it.
+
+Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/cxgb3/cxgb3_main.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/cxgb3/cxgb3_main.c
++++ b/drivers/net/cxgb3/cxgb3_main.c
+@@ -2296,6 +2296,8 @@ static int cxgb_extension_ioctl(struct n
+ case CHELSIO_GET_QSET_NUM:{
+ struct ch_reg edata;
+
++ memset(&edata, 0, sizeof(struct ch_reg));
++
+ edata.cmd = CHELSIO_GET_QSET_NUM;
+ edata.val = pi->nqsets;
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
--- /dev/null
+From 44467187dc22fdd33a1a06ea0ba86ce20be3fe3c Mon Sep 17 00:00:00 2001
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+Date: Wed, 15 Sep 2010 11:43:04 +0000
+Subject: drivers/net/eql.c: prevent reading uninitialized stack memory
+
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+
+commit 44467187dc22fdd33a1a06ea0ba86ce20be3fe3c upstream.
+
+Fixed formatting (tabs and line breaks).
+
+The EQL_GETMASTRCFG device ioctl allows unprivileged users to read 16
+bytes of uninitialized stack memory, because the "master_name" member of
+the master_config_t struct declared on the stack in eql_g_master_cfg()
+is not altered or zeroed before being copied back to the user. This
+patch takes care of it.
+
+Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/eql.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/eql.c
++++ b/drivers/net/eql.c
+@@ -555,6 +555,8 @@ static int eql_g_master_cfg(struct net_d
+ equalizer_t *eql;
+ master_config_t mc;
+
++ memset(&mc, 0, sizeof(master_config_t));
++
+ if (eql_is_master(dev)) {
+ eql = netdev_priv(dev);
+ mc.max_slaves = eql->max_slaves;
--- /dev/null
+From 7011e660938fc44ed86319c18a5954e95a82ab3e Mon Sep 17 00:00:00 2001
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+Date: Wed, 15 Sep 2010 11:43:28 +0000
+Subject: drivers/net/usb/hso.c: prevent reading uninitialized memory
+
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+
+commit 7011e660938fc44ed86319c18a5954e95a82ab3e upstream.
+
+Fixed formatting (tabs and line breaks).
+
+The TIOCGICOUNT device ioctl allows unprivileged users to read
+uninitialized stack memory, because the "reserved" member of the
+serial_icounter_struct struct declared on the stack in hso_get_count()
+is not altered or zeroed before being copied back to the user. This
+patch takes care of it.
+
+Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/usb/hso.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/usb/hso.c
++++ b/drivers/net/usb/hso.c
+@@ -1653,6 +1653,8 @@ static int hso_get_count(struct hso_seri
+ struct uart_icount cnow;
+ struct hso_tiocmget *tiocmget = serial->tiocmget;
+
++ memset(&icount, 0, sizeof(struct serial_icounter_struct));
++
+ if (!tiocmget)
+ return -ENOENT;
+ spin_lock_irq(&serial->serial_lock);
--- /dev/null
+From 1006c52235c334fed26dac15fd13a8e9b79d2845 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet@gmail.com>
+Date: Wed, 1 Sep 2010 00:50:51 +0000
+Subject: gro: fix different skb headrooms
+
+
+From: Eric Dumazet <eric.dumazet@gmail.com>
+
+[ Upstream commit 3d3be4333fdf6faa080947b331a6a19bce1a4f57 ]
+
+Packets entering GRO might have different headrooms, even for a given
+flow (because of implementation details in drivers, like copybreak).
+We cant force drivers to deliver packets with a fixed headroom.
+
+1) fix skb_segment()
+
+skb_segment() makes the false assumption headrooms of fragments are same
+than the head. When CHECKSUM_PARTIAL is used, this can give csum_start
+errors, and crash later in skb_copy_and_csum_dev()
+
+2) allocate a minimal skb for head of frag_list
+
+skb_gro_receive() uses netdev_alloc_skb(headroom + skb_gro_offset(p)) to
+allocate a fresh skb. This adds NET_SKB_PAD to a padding already
+provided by netdevice, depending on various things, like copybreak.
+
+Use alloc_skb() to allocate an exact padding, to reduce cache line
+needs:
+NET_SKB_PAD + NET_IP_ALIGN
+
+bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=16626
+
+Many thanks to Plamen Petrov, testing many debugging patches !
+With help of Jarek Poplawski.
+
+Reported-by: Plamen Petrov <pvp-lsts@fs.uni-ruse.bg>
+Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
+CC: Jarek Poplawski <jarkao2@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/core/skbuff.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -2574,6 +2574,10 @@ struct sk_buff *skb_segment(struct sk_bu
+ __copy_skb_header(nskb, skb);
+ nskb->mac_len = skb->mac_len;
+
++ /* nskb and skb might have different headroom */
++ if (nskb->ip_summed == CHECKSUM_PARTIAL)
++ nskb->csum_start += skb_headroom(nskb) - headroom;
++
+ skb_reset_mac_header(nskb);
+ skb_set_network_header(nskb, skb->mac_len);
+ nskb->transport_header = (nskb->network_header +
+@@ -2703,8 +2707,8 @@ int skb_gro_receive(struct sk_buff **hea
+ } else if (skb_gro_len(p) != pinfo->gso_size)
+ return -E2BIG;
+
+- headroom = skb_headroom(p);
+- nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p));
++ headroom = NET_SKB_PAD + NET_IP_ALIGN;
++ nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC);
+ if (unlikely(!nskb))
+ return -ENOMEM;
+
--- /dev/null
+From b9dd9f07c077ce7410d2f5bd74fb3db8a9cd07fd Mon Sep 17 00:00:00 2001
+From: Jarek Poplawski <jarkao2@gmail.com>
+Date: Sat, 4 Sep 2010 10:34:29 +0000
+Subject: gro: Re-fix different skb headrooms
+
+
+From: Jarek Poplawski <jarkao2@gmail.com>
+
+[ Upstream commit 64289c8e6851bca0e589e064c9a5c9fbd6ae5dd4 ]
+
+The patch: "gro: fix different skb headrooms" in its part:
+"2) allocate a minimal skb for head of frag_list" is buggy. The copied
+skb has p->data set at the ip header at the moment, and skb_gro_offset
+is the length of ip + tcp headers. So, after the change the length of
+mac header is skipped. Later skb_set_mac_header() sets it into the
+NET_SKB_PAD area (if it's long enough) and ip header is misaligned at
+NET_SKB_PAD + NET_IP_ALIGN offset. There is no reason to assume the
+original skb was wrongly allocated, so let's copy it as it was.
+
+bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=16626
+fixes commit: 3d3be4333fdf6faa080947b331a6a19bce1a4f57
+
+Reported-by: Plamen Petrov <pvp-lsts@fs.uni-ruse.bg>
+Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
+CC: Eric Dumazet <eric.dumazet@gmail.com>
+Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
+Tested-by: Plamen Petrov <pvp-lsts@fs.uni-ruse.bg>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/core/skbuff.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -2707,7 +2707,7 @@ int skb_gro_receive(struct sk_buff **hea
+ } else if (skb_gro_len(p) != pinfo->gso_size)
+ return -E2BIG;
+
+- headroom = NET_SKB_PAD + NET_IP_ALIGN;
++ headroom = skb_headroom(p);
+ nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC);
+ if (unlikely(!nskb))
+ return -ENOMEM;
--- /dev/null
+From 192fa287ebdf9d4acaae6197fb8205452a159ccb Mon Sep 17 00:00:00 2001
+From: David S. Miller <davem@davemloft.net>
+Date: Mon, 30 Aug 2010 18:35:24 -0700
+Subject: irda: Correctly clean up self->ias_obj on irda_bind() failure.
+
+
+From: David S. Miller <davem@davemloft.net>
+
+[ Upstream commit 628e300cccaa628d8fb92aa28cb7530a3d5f2257 ]
+
+If irda_open_tsap() fails, the irda_bind() code tries to destroy
+the ->ias_obj object by hand, but does so wrongly.
+
+In particular, it fails to a) release the hashbin attached to the
+object and b) reset the self->ias_obj pointer to NULL.
+
+Fix both problems by using irias_delete_object() and explicitly
+setting self->ias_obj to NULL, just as irda_release() does.
+
+Reported-by: Tavis Ormandy <taviso@cmpxchg8b.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/irda/af_irda.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/irda/af_irda.c
++++ b/net/irda/af_irda.c
+@@ -824,8 +824,8 @@ static int irda_bind(struct socket *sock
+
+ err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name);
+ if (err < 0) {
+- kfree(self->ias_obj->name);
+- kfree(self->ias_obj);
++ irias_delete_object(self->ias_obj);
++ self->ias_obj = NULL;
+ goto out;
+ }
+
--- /dev/null
+From c1debf1b013eabe9852ef9a61f51d99fbfed6e6a Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet@gmail.com>
+Date: Wed, 25 Aug 2010 23:44:35 +0000
+Subject: l2tp: test for ethernet header in l2tp_eth_dev_recv()
+
+
+From: Eric Dumazet <eric.dumazet@gmail.com>
+
+[ Upstream commit bfc960a8eec023a170a80697fe65157cd4f44f81 ]
+
+close https://bugzilla.kernel.org/show_bug.cgi?id=16529
+
+Before calling dev_forward_skb(), we should make sure skb head contains
+at least an ethernet header, even if length included in upper layer said
+so. Use pskb_may_pull() to make sure this ethernet header is present in
+skb head.
+
+Reported-by: Thomas Heil <heil@terminal-consulting.de>
+Reported-by: Ian Campbell <Ian.Campbell@eu.citrix.com>
+Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/l2tp/l2tp_eth.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/l2tp/l2tp_eth.c
++++ b/net/l2tp/l2tp_eth.c
+@@ -132,7 +132,7 @@ static void l2tp_eth_dev_recv(struct l2t
+ printk("\n");
+ }
+
+- if (data_len < ETH_HLEN)
++ if (!pskb_may_pull(skb, sizeof(ETH_HLEN)))
+ goto error;
+
+ secpath_reset(skb);
--- /dev/null
+From cc50691e9451c5782d3e11caee24d6b29815f567 Mon Sep 17 00:00:00 2001
+From: Jianzhao Wang <jianzhao.wang@6wind.com>
+Date: Wed, 8 Sep 2010 14:35:43 -0700
+Subject: net: blackhole route should always be recalculated
+
+
+From: Jianzhao Wang <jianzhao.wang@6wind.com>
+
+[ Upstream commit ae2688d59b5f861dc70a091d003773975d2ae7fb ]
+
+Blackhole routes are used when xfrm_lookup() returns -EREMOTE (error
+triggered by IKE for example), hence this kind of route is always
+temporary and so we should check if a better route exists for next
+packets.
+Bug has been introduced by commit d11a4dc18bf41719c9f0d7ed494d295dd2973b92.
+
+Signed-off-by: Jianzhao Wang <jianzhao.wang@6wind.com>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/ipv4/route.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -2741,6 +2741,11 @@ slow_output:
+
+ EXPORT_SYMBOL_GPL(__ip_route_output_key);
+
++static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
++{
++ return NULL;
++}
++
+ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
+ {
+ }
+@@ -2749,7 +2754,7 @@ static struct dst_ops ipv4_dst_blackhole
+ .family = AF_INET,
+ .protocol = cpu_to_be16(ETH_P_IP),
+ .destroy = ipv4_dst_destroy,
+- .check = ipv4_dst_check,
++ .check = ipv4_blackhole_dst_check,
+ .update_pmtu = ipv4_rt_blackhole_update_pmtu,
+ .entries = ATOMIC_INIT(0),
+ };
--- /dev/null
+From b8342a5f5557f5037913bb9d5210cce98b0fdef4 Mon Sep 17 00:00:00 2001
+From: David S. Miller <davem@davemloft.net>
+Date: Tue, 14 Sep 2010 21:41:20 -0700
+Subject: net: RPS needs to depend upon USE_GENERIC_SMP_HELPERS
+
+
+From: David S. Miller <davem@davemloft.net>
+
+[ Upstream commit 6dcbc12290abb452a5e42713faa6461b248e2f55 ]
+
+You cannot invoke __smp_call_function_single() unless the
+architecture sets this symbol.
+
+Reported-by: Daniel Hellstrom <daniel@gaisler.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/Kconfig
++++ b/net/Kconfig
+@@ -206,7 +206,7 @@ source "net/dcb/Kconfig"
+
+ config RPS
+ boolean
+- depends on SMP && SYSFS
++ depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
+ default y
+
+ menu "Network testing"
--- /dev/null
+From 7c70a5a1589a7ad110c15b0d80635cc925e84cb2 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet@gmail.com>
+Date: Mon, 16 Aug 2010 03:25:00 +0000
+Subject: rds: fix a leak of kernel memory
+
+
+From: Eric Dumazet <eric.dumazet@gmail.com>
+
+[ Upstream commit f037590fff3005ce8a1513858d7d44f50053cc8f ]
+
+struct rds_rdma_notify contains a 32 bits hole on 64bit arches,
+make sure it is zeroed before copying it to user.
+
+Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
+CC: Andy Grover <andy.grover@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/rds/recv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/rds/recv.c
++++ b/net/rds/recv.c
+@@ -297,7 +297,7 @@ static int rds_still_queued(struct rds_s
+ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
+ {
+ struct rds_notifier *notifier;
+- struct rds_rdma_notify cmsg;
++ struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */
+ unsigned int count = 0, max_messages = ~0U;
+ unsigned long flags;
+ LIST_HEAD(copy);
--- /dev/null
+From ccf013f24df45854a358c8c53f7fa87a39f795d0 Mon Sep 17 00:00:00 2001
+From: David S. Miller <davem@davemloft.net>
+Date: Mon, 23 Aug 2010 23:10:57 -0700
+Subject: sparc64: Get rid of indirect p1275 PROM call buffer.
+
+
+From: David S. Miller <davem@davemloft.net>
+
+[ Upstream commit 25edd6946a1d74e5e77813c2324a0908c68bcf9e ]
+
+This is based upon a report by Meelis Roos showing that it's possible
+that we'll try to fetch a property that is 32K in size with some
+devices. With the current fixed 3K buffer we use for moving data in
+and out of the firmware during PROM calls, that simply won't work.
+
+In fact, it will scramble random kernel data during bootup.
+
+The reasoning behind the temporary buffer is entirely historical. It
+used to be the case that we had problems referencing dynamic kernel
+memory (including the stack) early in the boot process before we
+explicitly told the firwmare to switch us over to the kernel trap
+table.
+
+So what we did was always give the firmware buffers that were locked
+into the main kernel image.
+
+But we no longer have problems like that, so get rid of all of this
+indirect bounce buffering.
+
+Besides fixing Meelis's bug, this also makes the kernel data about 3K
+smaller.
+
+It was also discovered during these conversions that the
+implementation of prom_retain() was completely wrong, so that was
+fixed here as well. Currently that interface is not in use.
+
+Reported-by: Meelis Roos <mroos@linux.ee>
+Tested-by: Meelis Roos <mroos@linux.ee>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/sparc/include/asm/oplib_64.h | 27 ---
+ arch/sparc/prom/cif.S | 16 -
+ arch/sparc/prom/console_64.c | 48 ++++-
+ arch/sparc/prom/devops_64.c | 36 +++-
+ arch/sparc/prom/misc_64.c | 314 ++++++++++++++++++++++++++------------
+ arch/sparc/prom/p1275.c | 102 ------------
+ arch/sparc/prom/tree_64.c | 210 ++++++++++++++++++-------
+ 7 files changed, 456 insertions(+), 297 deletions(-)
+
+--- a/arch/sparc/include/asm/oplib_64.h
++++ b/arch/sparc/include/asm/oplib_64.h
+@@ -185,9 +185,8 @@ extern int prom_getunumber(int syndrome_
+ char *buf, int buflen);
+
+ /* Retain physical memory to the caller across soft resets. */
+-extern unsigned long prom_retain(const char *name,
+- unsigned long pa_low, unsigned long pa_high,
+- long size, long align);
++extern int prom_retain(const char *name, unsigned long size,
++ unsigned long align, unsigned long *paddr);
+
+ /* Load explicit I/D TLB entries into the calling processor. */
+ extern long prom_itlb_load(unsigned long index,
+@@ -287,26 +286,6 @@ extern void prom_sun4v_guest_soft_state(
+ extern int prom_ihandle2path(int handle, char *buffer, int bufsize);
+
+ /* Client interface level routines. */
+-extern long p1275_cmd(const char *, long, ...);
+-
+-#if 0
+-#define P1275_SIZE(x) ((((long)((x) / 32)) << 32) | (x))
+-#else
+-#define P1275_SIZE(x) x
+-#endif
+-
+-/* We support at most 16 input and 1 output argument */
+-#define P1275_ARG_NUMBER 0
+-#define P1275_ARG_IN_STRING 1
+-#define P1275_ARG_OUT_BUF 2
+-#define P1275_ARG_OUT_32B 3
+-#define P1275_ARG_IN_FUNCTION 4
+-#define P1275_ARG_IN_BUF 5
+-#define P1275_ARG_IN_64B 6
+-
+-#define P1275_IN(x) ((x) & 0xf)
+-#define P1275_OUT(x) (((x) << 4) & 0xf0)
+-#define P1275_INOUT(i,o) (P1275_IN(i)|P1275_OUT(o))
+-#define P1275_ARG(n,x) ((x) << ((n)*3 + 8))
++extern void p1275_cmd_direct(unsigned long *);
+
+ #endif /* !(__SPARC64_OPLIB_H) */
+--- a/arch/sparc/prom/cif.S
++++ b/arch/sparc/prom/cif.S
+@@ -9,18 +9,18 @@
+ #include <asm/thread_info.h>
+
+ .text
+- .globl prom_cif_interface
+-prom_cif_interface:
+- sethi %hi(p1275buf), %o0
+- or %o0, %lo(p1275buf), %o0
+- ldx [%o0 + 0x010], %o1 ! prom_cif_stack
+- save %o1, -192, %sp
+- ldx [%i0 + 0x008], %l2 ! prom_cif_handler
++ .globl prom_cif_direct
++prom_cif_direct:
++ sethi %hi(p1275buf), %o1
++ or %o1, %lo(p1275buf), %o1
++ ldx [%o1 + 0x0010], %o2 ! prom_cif_stack
++ save %o2, -192, %sp
++ ldx [%i1 + 0x0008], %l2 ! prom_cif_handler
+ mov %g4, %l0
+ mov %g5, %l1
+ mov %g6, %l3
+ call %l2
+- add %i0, 0x018, %o0 ! prom_args
++ mov %i0, %o0 ! prom_args
+ mov %l0, %g4
+ mov %l1, %g5
+ mov %l3, %g6
+--- a/arch/sparc/prom/console_64.c
++++ b/arch/sparc/prom/console_64.c
+@@ -21,14 +21,22 @@ extern int prom_stdin, prom_stdout;
+ inline int
+ prom_nbgetchar(void)
+ {
++ unsigned long args[7];
+ char inc;
+
+- if (p1275_cmd("read", P1275_ARG(1,P1275_ARG_OUT_BUF)|
+- P1275_INOUT(3,1),
+- prom_stdin, &inc, P1275_SIZE(1)) == 1)
++ args[0] = (unsigned long) "read";
++ args[1] = 3;
++ args[2] = 1;
++ args[3] = (unsigned int) prom_stdin;
++ args[4] = (unsigned long) &inc;
++ args[5] = 1;
++ args[6] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ if (args[6] == 1)
+ return inc;
+- else
+- return -1;
++ return -1;
+ }
+
+ /* Non blocking put character to console device, returns -1 if
+@@ -37,12 +45,22 @@ prom_nbgetchar(void)
+ inline int
+ prom_nbputchar(char c)
+ {
++ unsigned long args[7];
+ char outc;
+
+ outc = c;
+- if (p1275_cmd("write", P1275_ARG(1,P1275_ARG_IN_BUF)|
+- P1275_INOUT(3,1),
+- prom_stdout, &outc, P1275_SIZE(1)) == 1)
++
++ args[0] = (unsigned long) "write";
++ args[1] = 3;
++ args[2] = 1;
++ args[3] = (unsigned int) prom_stdout;
++ args[4] = (unsigned long) &outc;
++ args[5] = 1;
++ args[6] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ if (args[6] == 1)
+ return 0;
+ else
+ return -1;
+@@ -67,7 +85,15 @@ prom_putchar(char c)
+ void
+ prom_puts(const char *s, int len)
+ {
+- p1275_cmd("write", P1275_ARG(1,P1275_ARG_IN_BUF)|
+- P1275_INOUT(3,1),
+- prom_stdout, s, P1275_SIZE(len));
++ unsigned long args[7];
++
++ args[0] = (unsigned long) "write";
++ args[1] = 3;
++ args[2] = 1;
++ args[3] = (unsigned int) prom_stdout;
++ args[4] = (unsigned long) s;
++ args[5] = len;
++ args[6] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
+ }
+--- a/arch/sparc/prom/devops_64.c
++++ b/arch/sparc/prom/devops_64.c
+@@ -18,16 +18,32 @@
+ int
+ prom_devopen(const char *dstr)
+ {
+- return p1275_cmd ("open", P1275_ARG(0,P1275_ARG_IN_STRING)|
+- P1275_INOUT(1,1),
+- dstr);
++ unsigned long args[5];
++
++ args[0] = (unsigned long) "open";
++ args[1] = 1;
++ args[2] = 1;
++ args[3] = (unsigned long) dstr;
++ args[4] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ return (int) args[4];
+ }
+
+ /* Close the device described by device handle 'dhandle'. */
+ int
+ prom_devclose(int dhandle)
+ {
+- p1275_cmd ("close", P1275_INOUT(1,0), dhandle);
++ unsigned long args[4];
++
++ args[0] = (unsigned long) "close";
++ args[1] = 1;
++ args[2] = 0;
++ args[3] = (unsigned int) dhandle;
++
++ p1275_cmd_direct(args);
++
+ return 0;
+ }
+
+@@ -37,5 +53,15 @@ prom_devclose(int dhandle)
+ void
+ prom_seek(int dhandle, unsigned int seekhi, unsigned int seeklo)
+ {
+- p1275_cmd ("seek", P1275_INOUT(3,1), dhandle, seekhi, seeklo);
++ unsigned long args[7];
++
++ args[0] = (unsigned long) "seek";
++ args[1] = 3;
++ args[2] = 1;
++ args[3] = (unsigned int) dhandle;
++ args[4] = seekhi;
++ args[5] = seeklo;
++ args[6] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
+ }
+--- a/arch/sparc/prom/misc_64.c
++++ b/arch/sparc/prom/misc_64.c
+@@ -20,10 +20,17 @@
+
+ int prom_service_exists(const char *service_name)
+ {
+- int err = p1275_cmd("test", P1275_ARG(0, P1275_ARG_IN_STRING) |
+- P1275_INOUT(1, 1), service_name);
++ unsigned long args[5];
+
+- if (err)
++ args[0] = (unsigned long) "test";
++ args[1] = 1;
++ args[2] = 1;
++ args[3] = (unsigned long) service_name;
++ args[4] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ if (args[4])
+ return 0;
+ return 1;
+ }
+@@ -31,30 +38,47 @@ int prom_service_exists(const char *serv
+ void prom_sun4v_guest_soft_state(void)
+ {
+ const char *svc = "SUNW,soft-state-supported";
++ unsigned long args[3];
+
+ if (!prom_service_exists(svc))
+ return;
+- p1275_cmd(svc, P1275_INOUT(0, 0));
++ args[0] = (unsigned long) svc;
++ args[1] = 0;
++ args[2] = 0;
++ p1275_cmd_direct(args);
+ }
+
+ /* Reset and reboot the machine with the command 'bcommand'. */
+ void prom_reboot(const char *bcommand)
+ {
++ unsigned long args[4];
++
+ #ifdef CONFIG_SUN_LDOMS
+ if (ldom_domaining_enabled)
+ ldom_reboot(bcommand);
+ #endif
+- p1275_cmd("boot", P1275_ARG(0, P1275_ARG_IN_STRING) |
+- P1275_INOUT(1, 0), bcommand);
++ args[0] = (unsigned long) "boot";
++ args[1] = 1;
++ args[2] = 0;
++ args[3] = (unsigned long) bcommand;
++
++ p1275_cmd_direct(args);
+ }
+
+ /* Forth evaluate the expression contained in 'fstring'. */
+ void prom_feval(const char *fstring)
+ {
++ unsigned long args[5];
++
+ if (!fstring || fstring[0] == 0)
+ return;
+- p1275_cmd("interpret", P1275_ARG(0, P1275_ARG_IN_STRING) |
+- P1275_INOUT(1, 1), fstring);
++ args[0] = (unsigned long) "interpret";
++ args[1] = 1;
++ args[2] = 1;
++ args[3] = (unsigned long) fstring;
++ args[4] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
+ }
+ EXPORT_SYMBOL(prom_feval);
+
+@@ -68,6 +92,7 @@ extern void smp_release(void);
+ */
+ void prom_cmdline(void)
+ {
++ unsigned long args[3];
+ unsigned long flags;
+
+ local_irq_save(flags);
+@@ -76,7 +101,11 @@ void prom_cmdline(void)
+ smp_capture();
+ #endif
+
+- p1275_cmd("enter", P1275_INOUT(0, 0));
++ args[0] = (unsigned long) "enter";
++ args[1] = 0;
++ args[2] = 0;
++
++ p1275_cmd_direct(args);
+
+ #ifdef CONFIG_SMP
+ smp_release();
+@@ -90,22 +119,32 @@ void prom_cmdline(void)
+ */
+ void notrace prom_halt(void)
+ {
++ unsigned long args[3];
++
+ #ifdef CONFIG_SUN_LDOMS
+ if (ldom_domaining_enabled)
+ ldom_power_off();
+ #endif
+ again:
+- p1275_cmd("exit", P1275_INOUT(0, 0));
++ args[0] = (unsigned long) "exit";
++ args[1] = 0;
++ args[2] = 0;
++ p1275_cmd_direct(args);
+ goto again; /* PROM is out to get me -DaveM */
+ }
+
+ void prom_halt_power_off(void)
+ {
++ unsigned long args[3];
++
+ #ifdef CONFIG_SUN_LDOMS
+ if (ldom_domaining_enabled)
+ ldom_power_off();
+ #endif
+- p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0));
++ args[0] = (unsigned long) "SUNW,power-off";
++ args[1] = 0;
++ args[2] = 0;
++ p1275_cmd_direct(args);
+
+ /* if nothing else helps, we just halt */
+ prom_halt();
+@@ -114,10 +153,15 @@ void prom_halt_power_off(void)
+ /* Set prom sync handler to call function 'funcp'. */
+ void prom_setcallback(callback_func_t funcp)
+ {
++ unsigned long args[5];
+ if (!funcp)
+ return;
+- p1275_cmd("set-callback", P1275_ARG(0, P1275_ARG_IN_FUNCTION) |
+- P1275_INOUT(1, 1), funcp);
++ args[0] = (unsigned long) "set-callback";
++ args[1] = 1;
++ args[2] = 1;
++ args[3] = (unsigned long) funcp;
++ args[4] = (unsigned long) -1;
++ p1275_cmd_direct(args);
+ }
+
+ /* Get the idprom and stuff it into buffer 'idbuf'. Returns the
+@@ -173,57 +217,61 @@ static int prom_get_memory_ihandle(void)
+ }
+
+ /* Load explicit I/D TLB entries. */
++static long tlb_load(const char *type, unsigned long index,
++ unsigned long tte_data, unsigned long vaddr)
++{
++ unsigned long args[9];
++
++ args[0] = (unsigned long) prom_callmethod_name;
++ args[1] = 5;
++ args[2] = 1;
++ args[3] = (unsigned long) type;
++ args[4] = (unsigned int) prom_get_mmu_ihandle();
++ args[5] = vaddr;
++ args[6] = tte_data;
++ args[7] = index;
++ args[8] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ return (long) args[8];
++}
++
+ long prom_itlb_load(unsigned long index,
+ unsigned long tte_data,
+ unsigned long vaddr)
+ {
+- return p1275_cmd(prom_callmethod_name,
+- (P1275_ARG(0, P1275_ARG_IN_STRING) |
+- P1275_ARG(2, P1275_ARG_IN_64B) |
+- P1275_ARG(3, P1275_ARG_IN_64B) |
+- P1275_INOUT(5, 1)),
+- "SUNW,itlb-load",
+- prom_get_mmu_ihandle(),
+- /* And then our actual args are pushed backwards. */
+- vaddr,
+- tte_data,
+- index);
++ return tlb_load("SUNW,itlb-load", index, tte_data, vaddr);
+ }
+
+ long prom_dtlb_load(unsigned long index,
+ unsigned long tte_data,
+ unsigned long vaddr)
+ {
+- return p1275_cmd(prom_callmethod_name,
+- (P1275_ARG(0, P1275_ARG_IN_STRING) |
+- P1275_ARG(2, P1275_ARG_IN_64B) |
+- P1275_ARG(3, P1275_ARG_IN_64B) |
+- P1275_INOUT(5, 1)),
+- "SUNW,dtlb-load",
+- prom_get_mmu_ihandle(),
+- /* And then our actual args are pushed backwards. */
+- vaddr,
+- tte_data,
+- index);
++ return tlb_load("SUNW,dtlb-load", index, tte_data, vaddr);
+ }
+
+ int prom_map(int mode, unsigned long size,
+ unsigned long vaddr, unsigned long paddr)
+ {
+- int ret = p1275_cmd(prom_callmethod_name,
+- (P1275_ARG(0, P1275_ARG_IN_STRING) |
+- P1275_ARG(3, P1275_ARG_IN_64B) |
+- P1275_ARG(4, P1275_ARG_IN_64B) |
+- P1275_ARG(6, P1275_ARG_IN_64B) |
+- P1275_INOUT(7, 1)),
+- prom_map_name,
+- prom_get_mmu_ihandle(),
+- mode,
+- size,
+- vaddr,
+- 0,
+- paddr);
++ unsigned long args[11];
++ int ret;
+
++ args[0] = (unsigned long) prom_callmethod_name;
++ args[1] = 7;
++ args[2] = 1;
++ args[3] = (unsigned long) prom_map_name;
++ args[4] = (unsigned int) prom_get_mmu_ihandle();
++ args[5] = (unsigned int) mode;
++ args[6] = size;
++ args[7] = vaddr;
++ args[8] = 0;
++ args[9] = paddr;
++ args[10] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ ret = (int) args[10];
+ if (ret == 0)
+ ret = -1;
+ return ret;
+@@ -231,40 +279,51 @@ int prom_map(int mode, unsigned long siz
+
+ void prom_unmap(unsigned long size, unsigned long vaddr)
+ {
+- p1275_cmd(prom_callmethod_name,
+- (P1275_ARG(0, P1275_ARG_IN_STRING) |
+- P1275_ARG(2, P1275_ARG_IN_64B) |
+- P1275_ARG(3, P1275_ARG_IN_64B) |
+- P1275_INOUT(4, 0)),
+- prom_unmap_name,
+- prom_get_mmu_ihandle(),
+- size,
+- vaddr);
++ unsigned long args[7];
++
++ args[0] = (unsigned long) prom_callmethod_name;
++ args[1] = 4;
++ args[2] = 0;
++ args[3] = (unsigned long) prom_unmap_name;
++ args[4] = (unsigned int) prom_get_mmu_ihandle();
++ args[5] = size;
++ args[6] = vaddr;
++
++ p1275_cmd_direct(args);
+ }
+
+ /* Set aside physical memory which is not touched or modified
+ * across soft resets.
+ */
+-unsigned long prom_retain(const char *name,
+- unsigned long pa_low, unsigned long pa_high,
+- long size, long align)
+-{
+- /* XXX I don't think we return multiple values correctly.
+- * XXX OBP supposedly returns pa_low/pa_high here, how does
+- * XXX it work?
+- */
++int prom_retain(const char *name, unsigned long size,
++ unsigned long align, unsigned long *paddr)
++{
++ unsigned long args[11];
+
+- /* If align is zero, the pa_low/pa_high args are passed,
+- * else they are not.
++ args[0] = (unsigned long) prom_callmethod_name;
++ args[1] = 5;
++ args[2] = 3;
++ args[3] = (unsigned long) "SUNW,retain";
++ args[4] = (unsigned int) prom_get_memory_ihandle();
++ args[5] = align;
++ args[6] = size;
++ args[7] = (unsigned long) name;
++ args[8] = (unsigned long) -1;
++ args[9] = (unsigned long) -1;
++ args[10] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ if (args[8])
++ return (int) args[8];
++
++ /* Next we get "phys_high" then "phys_low". On 64-bit
++ * the phys_high cell is don't care since the phys_low
++ * cell has the full value.
+ */
+- if (align == 0)
+- return p1275_cmd("SUNW,retain",
+- (P1275_ARG(0, P1275_ARG_IN_BUF) | P1275_INOUT(5, 2)),
+- name, pa_low, pa_high, size, align);
+- else
+- return p1275_cmd("SUNW,retain",
+- (P1275_ARG(0, P1275_ARG_IN_BUF) | P1275_INOUT(3, 2)),
+- name, size, align);
++ *paddr = args[10];
++
++ return 0;
+ }
+
+ /* Get "Unumber" string for the SIMM at the given
+@@ -277,62 +336,129 @@ int prom_getunumber(int syndrome_code,
+ unsigned long phys_addr,
+ char *buf, int buflen)
+ {
+- return p1275_cmd(prom_callmethod_name,
+- (P1275_ARG(0, P1275_ARG_IN_STRING) |
+- P1275_ARG(3, P1275_ARG_OUT_BUF) |
+- P1275_ARG(6, P1275_ARG_IN_64B) |
+- P1275_INOUT(8, 2)),
+- "SUNW,get-unumber", prom_get_memory_ihandle(),
+- buflen, buf, P1275_SIZE(buflen),
+- 0, phys_addr, syndrome_code);
++ unsigned long args[12];
++
++ args[0] = (unsigned long) prom_callmethod_name;
++ args[1] = 7;
++ args[2] = 2;
++ args[3] = (unsigned long) "SUNW,get-unumber";
++ args[4] = (unsigned int) prom_get_memory_ihandle();
++ args[5] = buflen;
++ args[6] = (unsigned long) buf;
++ args[7] = 0;
++ args[8] = phys_addr;
++ args[9] = (unsigned int) syndrome_code;
++ args[10] = (unsigned long) -1;
++ args[11] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ return (int) args[10];
+ }
+
+ /* Power management extensions. */
+ void prom_sleepself(void)
+ {
+- p1275_cmd("SUNW,sleep-self", P1275_INOUT(0, 0));
++ unsigned long args[3];
++
++ args[0] = (unsigned long) "SUNW,sleep-self";
++ args[1] = 0;
++ args[2] = 0;
++ p1275_cmd_direct(args);
+ }
+
+ int prom_sleepsystem(void)
+ {
+- return p1275_cmd("SUNW,sleep-system", P1275_INOUT(0, 1));
++ unsigned long args[4];
++
++ args[0] = (unsigned long) "SUNW,sleep-system";
++ args[1] = 0;
++ args[2] = 1;
++ args[3] = (unsigned long) -1;
++ p1275_cmd_direct(args);
++
++ return (int) args[3];
+ }
+
+ int prom_wakeupsystem(void)
+ {
+- return p1275_cmd("SUNW,wakeup-system", P1275_INOUT(0, 1));
++ unsigned long args[4];
++
++ args[0] = (unsigned long) "SUNW,wakeup-system";
++ args[1] = 0;
++ args[2] = 1;
++ args[3] = (unsigned long) -1;
++ p1275_cmd_direct(args);
++
++ return (int) args[3];
+ }
+
+ #ifdef CONFIG_SMP
+ void prom_startcpu(int cpunode, unsigned long pc, unsigned long arg)
+ {
+- p1275_cmd("SUNW,start-cpu", P1275_INOUT(3, 0), cpunode, pc, arg);
++ unsigned long args[6];
++
++ args[0] = (unsigned long) "SUNW,start-cpu";
++ args[1] = 3;
++ args[2] = 0;
++ args[3] = (unsigned int) cpunode;
++ args[4] = pc;
++ args[5] = arg;
++ p1275_cmd_direct(args);
+ }
+
+ void prom_startcpu_cpuid(int cpuid, unsigned long pc, unsigned long arg)
+ {
+- p1275_cmd("SUNW,start-cpu-by-cpuid", P1275_INOUT(3, 0),
+- cpuid, pc, arg);
++ unsigned long args[6];
++
++ args[0] = (unsigned long) "SUNW,start-cpu-by-cpuid";
++ args[1] = 3;
++ args[2] = 0;
++ args[3] = (unsigned int) cpuid;
++ args[4] = pc;
++ args[5] = arg;
++ p1275_cmd_direct(args);
+ }
+
+ void prom_stopcpu_cpuid(int cpuid)
+ {
+- p1275_cmd("SUNW,stop-cpu-by-cpuid", P1275_INOUT(1, 0),
+- cpuid);
++ unsigned long args[4];
++
++ args[0] = (unsigned long) "SUNW,stop-cpu-by-cpuid";
++ args[1] = 1;
++ args[2] = 0;
++ args[3] = (unsigned int) cpuid;
++ p1275_cmd_direct(args);
+ }
+
+ void prom_stopself(void)
+ {
+- p1275_cmd("SUNW,stop-self", P1275_INOUT(0, 0));
++ unsigned long args[3];
++
++ args[0] = (unsigned long) "SUNW,stop-self";
++ args[1] = 0;
++ args[2] = 0;
++ p1275_cmd_direct(args);
+ }
+
+ void prom_idleself(void)
+ {
+- p1275_cmd("SUNW,idle-self", P1275_INOUT(0, 0));
++ unsigned long args[3];
++
++ args[0] = (unsigned long) "SUNW,idle-self";
++ args[1] = 0;
++ args[2] = 0;
++ p1275_cmd_direct(args);
+ }
+
+ void prom_resumecpu(int cpunode)
+ {
+- p1275_cmd("SUNW,resume-cpu", P1275_INOUT(1, 0), cpunode);
++ unsigned long args[4];
++
++ args[0] = (unsigned long) "SUNW,resume-cpu";
++ args[1] = 1;
++ args[2] = 0;
++ args[3] = (unsigned int) cpunode;
++ p1275_cmd_direct(args);
+ }
+ #endif
+--- a/arch/sparc/prom/p1275.c
++++ b/arch/sparc/prom/p1275.c
+@@ -22,13 +22,11 @@ struct {
+ long prom_callback; /* 0x00 */
+ void (*prom_cif_handler)(long *); /* 0x08 */
+ unsigned long prom_cif_stack; /* 0x10 */
+- unsigned long prom_args [23]; /* 0x18 */
+- char prom_buffer [3000];
+ } p1275buf;
+
+ extern void prom_world(int);
+
+-extern void prom_cif_interface(void);
++extern void prom_cif_direct(unsigned long *args);
+ extern void prom_cif_callback(void);
+
+ /*
+@@ -36,114 +34,20 @@ extern void prom_cif_callback(void);
+ */
+ DEFINE_RAW_SPINLOCK(prom_entry_lock);
+
+-long p1275_cmd(const char *service, long fmt, ...)
++void p1275_cmd_direct(unsigned long *args)
+ {
+- char *p, *q;
+ unsigned long flags;
+- int nargs, nrets, i;
+- va_list list;
+- long attrs, x;
+-
+- p = p1275buf.prom_buffer;
+
+ raw_local_save_flags(flags);
+ raw_local_irq_restore(PIL_NMI);
+ raw_spin_lock(&prom_entry_lock);
+
+- p1275buf.prom_args[0] = (unsigned long)p; /* service */
+- strcpy (p, service);
+- p = (char *)(((long)(strchr (p, 0) + 8)) & ~7);
+- p1275buf.prom_args[1] = nargs = (fmt & 0x0f); /* nargs */
+- p1275buf.prom_args[2] = nrets = ((fmt & 0xf0) >> 4); /* nrets */
+- attrs = fmt >> 8;
+- va_start(list, fmt);
+- for (i = 0; i < nargs; i++, attrs >>= 3) {
+- switch (attrs & 0x7) {
+- case P1275_ARG_NUMBER:
+- p1275buf.prom_args[i + 3] =
+- (unsigned)va_arg(list, long);
+- break;
+- case P1275_ARG_IN_64B:
+- p1275buf.prom_args[i + 3] =
+- va_arg(list, unsigned long);
+- break;
+- case P1275_ARG_IN_STRING:
+- strcpy (p, va_arg(list, char *));
+- p1275buf.prom_args[i + 3] = (unsigned long)p;
+- p = (char *)(((long)(strchr (p, 0) + 8)) & ~7);
+- break;
+- case P1275_ARG_OUT_BUF:
+- (void) va_arg(list, char *);
+- p1275buf.prom_args[i + 3] = (unsigned long)p;
+- x = va_arg(list, long);
+- i++; attrs >>= 3;
+- p = (char *)(((long)(p + (int)x + 7)) & ~7);
+- p1275buf.prom_args[i + 3] = x;
+- break;
+- case P1275_ARG_IN_BUF:
+- q = va_arg(list, char *);
+- p1275buf.prom_args[i + 3] = (unsigned long)p;
+- x = va_arg(list, long);
+- i++; attrs >>= 3;
+- memcpy (p, q, (int)x);
+- p = (char *)(((long)(p + (int)x + 7)) & ~7);
+- p1275buf.prom_args[i + 3] = x;
+- break;
+- case P1275_ARG_OUT_32B:
+- (void) va_arg(list, char *);
+- p1275buf.prom_args[i + 3] = (unsigned long)p;
+- p += 32;
+- break;
+- case P1275_ARG_IN_FUNCTION:
+- p1275buf.prom_args[i + 3] =
+- (unsigned long)prom_cif_callback;
+- p1275buf.prom_callback = va_arg(list, long);
+- break;
+- }
+- }
+- va_end(list);
+-
+ prom_world(1);
+- prom_cif_interface();
++ prom_cif_direct(args);
+ prom_world(0);
+
+- attrs = fmt >> 8;
+- va_start(list, fmt);
+- for (i = 0; i < nargs; i++, attrs >>= 3) {
+- switch (attrs & 0x7) {
+- case P1275_ARG_NUMBER:
+- (void) va_arg(list, long);
+- break;
+- case P1275_ARG_IN_STRING:
+- (void) va_arg(list, char *);
+- break;
+- case P1275_ARG_IN_FUNCTION:
+- (void) va_arg(list, long);
+- break;
+- case P1275_ARG_IN_BUF:
+- (void) va_arg(list, char *);
+- (void) va_arg(list, long);
+- i++; attrs >>= 3;
+- break;
+- case P1275_ARG_OUT_BUF:
+- p = va_arg(list, char *);
+- x = va_arg(list, long);
+- memcpy (p, (char *)(p1275buf.prom_args[i + 3]), (int)x);
+- i++; attrs >>= 3;
+- break;
+- case P1275_ARG_OUT_32B:
+- p = va_arg(list, char *);
+- memcpy (p, (char *)(p1275buf.prom_args[i + 3]), 32);
+- break;
+- }
+- }
+- va_end(list);
+- x = p1275buf.prom_args [nargs + 3];
+-
+ raw_spin_unlock(&prom_entry_lock);
+ raw_local_irq_restore(flags);
+-
+- return x;
+ }
+
+ void prom_cif_init(void *cif_handler, void *cif_stack)
+--- a/arch/sparc/prom/tree_64.c
++++ b/arch/sparc/prom/tree_64.c
+@@ -16,22 +16,39 @@
+ #include <asm/oplib.h>
+ #include <asm/ldc.h>
+
++static int prom_node_to_node(const char *type, int node)
++{
++ unsigned long args[5];
++
++ args[0] = (unsigned long) type;
++ args[1] = 1;
++ args[2] = 1;
++ args[3] = (unsigned int) node;
++ args[4] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ return (int) args[4];
++}
++
+ /* Return the child of node 'node' or zero if no this node has no
+ * direct descendent.
+ */
+ inline int __prom_getchild(int node)
+ {
+- return p1275_cmd ("child", P1275_INOUT(1, 1), node);
++ return prom_node_to_node("child", node);
+ }
+
+ inline int prom_getchild(int node)
+ {
+ int cnode;
+
+- if(node == -1) return 0;
++ if (node == -1)
++ return 0;
+ cnode = __prom_getchild(node);
+- if(cnode == -1) return 0;
+- return (int)cnode;
++ if (cnode == -1)
++ return 0;
++ return cnode;
+ }
+ EXPORT_SYMBOL(prom_getchild);
+
+@@ -39,10 +56,12 @@ inline int prom_getparent(int node)
+ {
+ int cnode;
+
+- if(node == -1) return 0;
+- cnode = p1275_cmd ("parent", P1275_INOUT(1, 1), node);
+- if(cnode == -1) return 0;
+- return (int)cnode;
++ if (node == -1)
++ return 0;
++ cnode = prom_node_to_node("parent", node);
++ if (cnode == -1)
++ return 0;
++ return cnode;
+ }
+
+ /* Return the next sibling of node 'node' or zero if no more siblings
+@@ -50,7 +69,7 @@ inline int prom_getparent(int node)
+ */
+ inline int __prom_getsibling(int node)
+ {
+- return p1275_cmd(prom_peer_name, P1275_INOUT(1, 1), node);
++ return prom_node_to_node(prom_peer_name, node);
+ }
+
+ inline int prom_getsibling(int node)
+@@ -72,11 +91,21 @@ EXPORT_SYMBOL(prom_getsibling);
+ */
+ inline int prom_getproplen(int node, const char *prop)
+ {
+- if((!node) || (!prop)) return -1;
+- return p1275_cmd ("getproplen",
+- P1275_ARG(1,P1275_ARG_IN_STRING)|
+- P1275_INOUT(2, 1),
+- node, prop);
++ unsigned long args[6];
++
++ if (!node || !prop)
++ return -1;
++
++ args[0] = (unsigned long) "getproplen";
++ args[1] = 2;
++ args[2] = 1;
++ args[3] = (unsigned int) node;
++ args[4] = (unsigned long) prop;
++ args[5] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ return (int) args[5];
+ }
+ EXPORT_SYMBOL(prom_getproplen);
+
+@@ -87,19 +116,25 @@ EXPORT_SYMBOL(prom_getproplen);
+ inline int prom_getproperty(int node, const char *prop,
+ char *buffer, int bufsize)
+ {
++ unsigned long args[8];
+ int plen;
+
+ plen = prom_getproplen(node, prop);
+- if ((plen > bufsize) || (plen == 0) || (plen == -1)) {
++ if ((plen > bufsize) || (plen == 0) || (plen == -1))
+ return -1;
+- } else {
+- /* Ok, things seem all right. */
+- return p1275_cmd(prom_getprop_name,
+- P1275_ARG(1,P1275_ARG_IN_STRING)|
+- P1275_ARG(2,P1275_ARG_OUT_BUF)|
+- P1275_INOUT(4, 1),
+- node, prop, buffer, P1275_SIZE(plen));
+- }
++
++ args[0] = (unsigned long) prom_getprop_name;
++ args[1] = 4;
++ args[2] = 1;
++ args[3] = (unsigned int) node;
++ args[4] = (unsigned long) prop;
++ args[5] = (unsigned long) buffer;
++ args[6] = bufsize;
++ args[7] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ return (int) args[7];
+ }
+ EXPORT_SYMBOL(prom_getproperty);
+
+@@ -110,7 +145,7 @@ inline int prom_getint(int node, const c
+ {
+ int intprop;
+
+- if(prom_getproperty(node, prop, (char *) &intprop, sizeof(int)) != -1)
++ if (prom_getproperty(node, prop, (char *) &intprop, sizeof(int)) != -1)
+ return intprop;
+
+ return -1;
+@@ -126,7 +161,8 @@ int prom_getintdefault(int node, const c
+ int retval;
+
+ retval = prom_getint(node, property);
+- if(retval == -1) return deflt;
++ if (retval == -1)
++ return deflt;
+
+ return retval;
+ }
+@@ -138,7 +174,8 @@ int prom_getbool(int node, const char *p
+ int retval;
+
+ retval = prom_getproplen(node, prop);
+- if(retval == -1) return 0;
++ if (retval == -1)
++ return 0;
+ return 1;
+ }
+ EXPORT_SYMBOL(prom_getbool);
+@@ -152,7 +189,8 @@ void prom_getstring(int node, const char
+ int len;
+
+ len = prom_getproperty(node, prop, user_buf, ubuf_size);
+- if(len != -1) return;
++ if (len != -1)
++ return;
+ user_buf[0] = 0;
+ }
+ EXPORT_SYMBOL(prom_getstring);
+@@ -164,7 +202,8 @@ int prom_nodematch(int node, const char
+ {
+ char namebuf[128];
+ prom_getproperty(node, "name", namebuf, sizeof(namebuf));
+- if(strcmp(namebuf, name) == 0) return 1;
++ if (strcmp(namebuf, name) == 0)
++ return 1;
+ return 0;
+ }
+
+@@ -190,16 +229,29 @@ int prom_searchsiblings(int node_start,
+ }
+ EXPORT_SYMBOL(prom_searchsiblings);
+
++static const char *prom_nextprop_name = "nextprop";
++
+ /* Return the first property type for node 'node'.
+ * buffer should be at least 32B in length
+ */
+ inline char *prom_firstprop(int node, char *buffer)
+ {
++ unsigned long args[7];
++
+ *buffer = 0;
+- if(node == -1) return buffer;
+- p1275_cmd ("nextprop", P1275_ARG(2,P1275_ARG_OUT_32B)|
+- P1275_INOUT(3, 0),
+- node, (char *) 0x0, buffer);
++ if (node == -1)
++ return buffer;
++
++ args[0] = (unsigned long) prom_nextprop_name;
++ args[1] = 3;
++ args[2] = 1;
++ args[3] = (unsigned int) node;
++ args[4] = 0;
++ args[5] = (unsigned long) buffer;
++ args[6] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
+ return buffer;
+ }
+ EXPORT_SYMBOL(prom_firstprop);
+@@ -210,9 +262,10 @@ EXPORT_SYMBOL(prom_firstprop);
+ */
+ inline char *prom_nextprop(int node, const char *oprop, char *buffer)
+ {
++ unsigned long args[7];
+ char buf[32];
+
+- if(node == -1) {
++ if (node == -1) {
+ *buffer = 0;
+ return buffer;
+ }
+@@ -220,10 +273,17 @@ inline char *prom_nextprop(int node, con
+ strcpy (buf, oprop);
+ oprop = buf;
+ }
+- p1275_cmd ("nextprop", P1275_ARG(1,P1275_ARG_IN_STRING)|
+- P1275_ARG(2,P1275_ARG_OUT_32B)|
+- P1275_INOUT(3, 0),
+- node, oprop, buffer);
++
++ args[0] = (unsigned long) prom_nextprop_name;
++ args[1] = 3;
++ args[2] = 1;
++ args[3] = (unsigned int) node;
++ args[4] = (unsigned long) oprop;
++ args[5] = (unsigned long) buffer;
++ args[6] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
+ return buffer;
+ }
+ EXPORT_SYMBOL(prom_nextprop);
+@@ -231,12 +291,19 @@ EXPORT_SYMBOL(prom_nextprop);
+ int
+ prom_finddevice(const char *name)
+ {
++ unsigned long args[5];
++
+ if (!name)
+ return 0;
+- return p1275_cmd(prom_finddev_name,
+- P1275_ARG(0,P1275_ARG_IN_STRING)|
+- P1275_INOUT(1, 1),
+- name);
++ args[0] = (unsigned long) "finddevice";
++ args[1] = 1;
++ args[2] = 1;
++ args[3] = (unsigned long) name;
++ args[4] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ return (int) args[4];
+ }
+ EXPORT_SYMBOL(prom_finddevice);
+
+@@ -247,7 +314,7 @@ int prom_node_has_property(int node, con
+ *buf = 0;
+ do {
+ prom_nextprop(node, buf, buf);
+- if(!strcmp(buf, prop))
++ if (!strcmp(buf, prop))
+ return 1;
+ } while (*buf);
+ return 0;
+@@ -260,6 +327,8 @@ EXPORT_SYMBOL(prom_node_has_property);
+ int
+ prom_setprop(int node, const char *pname, char *value, int size)
+ {
++ unsigned long args[8];
++
+ if (size == 0)
+ return 0;
+ if ((pname == 0) || (value == 0))
+@@ -271,19 +340,37 @@ prom_setprop(int node, const char *pname
+ return 0;
+ }
+ #endif
+- return p1275_cmd ("setprop", P1275_ARG(1,P1275_ARG_IN_STRING)|
+- P1275_ARG(2,P1275_ARG_IN_BUF)|
+- P1275_INOUT(4, 1),
+- node, pname, value, P1275_SIZE(size));
++ args[0] = (unsigned long) "setprop";
++ args[1] = 4;
++ args[2] = 1;
++ args[3] = (unsigned int) node;
++ args[4] = (unsigned long) pname;
++ args[5] = (unsigned long) value;
++ args[6] = size;
++ args[7] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ return (int) args[7];
+ }
+ EXPORT_SYMBOL(prom_setprop);
+
+ inline int prom_inst2pkg(int inst)
+ {
++ unsigned long args[5];
+ int node;
+
+- node = p1275_cmd ("instance-to-package", P1275_INOUT(1, 1), inst);
+- if (node == -1) return 0;
++ args[0] = (unsigned long) "instance-to-package";
++ args[1] = 1;
++ args[2] = 1;
++ args[3] = (unsigned int) inst;
++ args[4] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ node = (int) args[4];
++ if (node == -1)
++ return 0;
+ return node;
+ }
+
+@@ -296,17 +383,28 @@ prom_pathtoinode(const char *path)
+ int node, inst;
+
+ inst = prom_devopen (path);
+- if (inst == 0) return 0;
+- node = prom_inst2pkg (inst);
+- prom_devclose (inst);
+- if (node == -1) return 0;
++ if (inst == 0)
++ return 0;
++ node = prom_inst2pkg(inst);
++ prom_devclose(inst);
++ if (node == -1)
++ return 0;
+ return node;
+ }
+
+ int prom_ihandle2path(int handle, char *buffer, int bufsize)
+ {
+- return p1275_cmd("instance-to-path",
+- P1275_ARG(1,P1275_ARG_OUT_BUF)|
+- P1275_INOUT(3, 1),
+- handle, buffer, P1275_SIZE(bufsize));
++ unsigned long args[7];
++
++ args[0] = (unsigned long) "instance-to-path";
++ args[1] = 3;
++ args[2] = 1;
++ args[3] = (unsigned int) handle;
++ args[4] = (unsigned long) buffer;
++ args[5] = bufsize;
++ args[6] = (unsigned long) -1;
++
++ p1275_cmd_direct(args);
++
++ return (int) args[6];
+ }
--- /dev/null
+From 9e6ade922d7ff8240fa791f8b6fd50701b01998c Mon Sep 17 00:00:00 2001
+From: David S. Miller <davem@davemloft.net>
+Date: Wed, 25 Aug 2010 02:27:49 -0700
+Subject: tcp: Combat per-cpu skew in orphan tests.
+
+
+From: David S. Miller <davem@davemloft.net>
+
+[ Upstream commit ad1af0fedba14f82b240a03fe20eb9b2fdbd0357 ]
+
+As reported by Anton Blanchard when we use
+percpu_counter_read_positive() to make our orphan socket limit checks,
+the check can be off by up to num_cpus_online() * batch (which is 32
+by default) which on a 128 cpu machine can be as large as the default
+orphan limit itself.
+
+Fix this by doing the full expensive sum check if the optimized check
+triggers.
+
+Reported-by: Anton Blanchard <anton@samba.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/net/tcp.h | 18 ++++++++++++++----
+ net/ipv4/tcp.c | 5 +----
+ net/ipv4/tcp_timer.c | 8 ++++----
+ 3 files changed, 19 insertions(+), 12 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -268,11 +268,21 @@ static inline int between(__u32 seq1, __
+ return seq3 - seq2 >= seq1 - seq2;
+ }
+
+-static inline int tcp_too_many_orphans(struct sock *sk, int num)
++static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
+ {
+- return (num > sysctl_tcp_max_orphans) ||
+- (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+- atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]);
++ struct percpu_counter *ocp = sk->sk_prot->orphan_count;
++ int orphans = percpu_counter_read_positive(ocp);
++
++ if (orphans << shift > sysctl_tcp_max_orphans) {
++ orphans = percpu_counter_sum_positive(ocp);
++ if (orphans << shift > sysctl_tcp_max_orphans)
++ return true;
++ }
++
++ if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
++ atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])
++ return true;
++ return false;
+ }
+
+ /* syncookies: remember time of last synqueue overflow */
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2002,11 +2002,8 @@ adjudge_to_death:
+ }
+ }
+ if (sk->sk_state != TCP_CLOSE) {
+- int orphan_count = percpu_counter_read_positive(
+- sk->sk_prot->orphan_count);
+-
+ sk_mem_reclaim(sk);
+- if (tcp_too_many_orphans(sk, orphan_count)) {
++ if (tcp_too_many_orphans(sk, 0)) {
+ if (net_ratelimit())
+ printk(KERN_INFO "TCP: too many of orphaned "
+ "sockets\n");
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -67,18 +67,18 @@ static void tcp_write_err(struct sock *s
+ static int tcp_out_of_resources(struct sock *sk, int do_reset)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+- int orphans = percpu_counter_read_positive(&tcp_orphan_count);
++ int shift = 0;
+
+ /* If peer does not open window for long time, or did not transmit
+ * anything for long time, penalize it. */
+ if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
+- orphans <<= 1;
++ shift++;
+
+ /* If some dubious ICMP arrived, penalize even more. */
+ if (sk->sk_err_soft)
+- orphans <<= 1;
++ shift++;
+
+- if (tcp_too_many_orphans(sk, orphans)) {
++ if (tcp_too_many_orphans(sk, shift)) {
+ if (net_ratelimit())
+ printk(KERN_INFO "Out of socket memory\n");
+
--- /dev/null
+From 2b40c537e4e84747f7485b8cb13b06c0061c91c0 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet@gmail.com>
+Date: Wed, 25 Aug 2010 23:02:17 -0700
+Subject: tcp: fix three tcp sysctls tuning
+
+
+From: Eric Dumazet <eric.dumazet@gmail.com>
+
+[ Upstream commit c5ed63d66f24fd4f7089b5a6e087b0ce7202aa8e ]
+
+As discovered by Anton Blanchard, current code to autotune
+tcp_death_row.sysctl_max_tw_buckets, sysctl_tcp_max_orphans and
+sysctl_max_syn_backlog makes little sense.
+
+The bigger a page is, the less tcp_max_orphans is : 4096 on a 512GB
+machine in Anton's case.
+
+(tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket))
+is much bigger if spinlock debugging is on. Its wrong to select bigger
+limits in this case (where kernel structures are also bigger)
+
+bhash_size max is 65536, and we get this value even for small machines.
+
+A better ground is to use size of ehash table, this also makes code
+shorter and more obvious.
+
+Based on a patch from Anton, and another from David.
+
+Reported-and-tested-by: Anton Blanchard <anton@samba.org>
+Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/ipv4/tcp.c | 24 +++++++-----------------
+ 1 file changed, 7 insertions(+), 17 deletions(-)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3193,7 +3193,7 @@ void __init tcp_init(void)
+ {
+ struct sk_buff *skb = NULL;
+ unsigned long nr_pages, limit;
+- int order, i, max_share;
++ int i, max_share, cnt;
+ unsigned long jiffy = jiffies;
+
+ BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
+@@ -3242,22 +3242,12 @@ void __init tcp_init(void)
+ INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
+ }
+
+- /* Try to be a bit smarter and adjust defaults depending
+- * on available memory.
+- */
+- for (order = 0; ((1 << order) << PAGE_SHIFT) <
+- (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket));
+- order++)
+- ;
+- if (order >= 4) {
+- tcp_death_row.sysctl_max_tw_buckets = 180000;
+- sysctl_tcp_max_orphans = 4096 << (order - 4);
+- sysctl_max_syn_backlog = 1024;
+- } else if (order < 3) {
+- tcp_death_row.sysctl_max_tw_buckets >>= (3 - order);
+- sysctl_tcp_max_orphans >>= (3 - order);
+- sysctl_max_syn_backlog = 128;
+- }
++
++ cnt = tcp_hashinfo.ehash_mask + 1;
++
++ tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
++ sysctl_tcp_max_orphans = cnt / 2;
++ sysctl_max_syn_backlog = max(128, cnt / 256);
+
+ /* Set the pressure threshold to be a fraction of global memory that
+ * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
--- /dev/null
+From 945c01a93b3fea8a0d8a837fb98ff0ec6613207a Mon Sep 17 00:00:00 2001
+From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+Date: Wed, 15 Sep 2010 10:27:52 -0700
+Subject: tcp: Prevent overzealous packetization by SWS logic.
+
+From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+
+[ Upstream commit 01f83d69844d307be2aa6fea88b0e8fe5cbdb2f4 ]
+
+If peer uses tiny MSS (say, 75 bytes) and similarly tiny advertised
+window, the SWS logic will packetize to half the MSS unnecessarily.
+
+This causes problems with some embedded devices.
+
+However for large MSS devices we do want to half-MSS packetize
+otherwise we never get enough packets into the pipe for things
+like fast retransmit and recovery to work.
+
+Be careful also to handle the case where MSS > window, otherwise
+we'll never send until the probe timer.
+
+Reported-by: ツ Leandro Melo de Sales <leandroal@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/net/tcp.h | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -519,8 +519,22 @@ extern unsigned int tcp_current_mss(stru
+ /* Bound MSS / TSO packet size with the half of the window */
+ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
+ {
+- if (tp->max_window && pktsize > (tp->max_window >> 1))
+- return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
++ int cutoff;
++
++ /* When peer uses tiny windows, there is no use in packetizing
++ * to sub-MSS pieces for the sake of SWS or making sure there
++ * are enough packets in the pipe for fast recovery.
++ *
++ * On the other hand, for extremely large MSS devices, handling
++ * smaller than MSS windows in this way does make sense.
++ */
++ if (tp->max_window >= 512)
++ cutoff = (tp->max_window >> 1);
++ else
++ cutoff = tp->max_window;
++
++ if (cutoff && pktsize > cutoff)
++ return max_t(int, cutoff, 68U - tp->tcp_header_len);
+ else
+ return pktsize;
+ }
--- /dev/null
+From e8387b3f5e5fc8ee54c8518d0e45cf3a338e81d6 Mon Sep 17 00:00:00 2001
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Date: Tue, 24 Aug 2010 16:05:48 +0000
+Subject: tcp: select(writefds) don't hang up when a peer close connection
+
+
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+
+[ Upstream commit d84ba638e4ba3c40023ff997aa5e8d3ed002af36 ]
+
+This issue come from ruby language community. Below test program
+hang up when only run on Linux.
+
+ % uname -mrsv
+ Linux 2.6.26-2-486 #1 Sat Dec 26 08:37:39 UTC 2009 i686
+ % ruby -rsocket -ve '
+ BasicSocket.do_not_reverse_lookup = true
+ serv = TCPServer.open("127.0.0.1", 0)
+ s1 = TCPSocket.open("127.0.0.1", serv.addr[1])
+ s2 = serv.accept
+ s2.close
+ s1.write("a") rescue p $!
+ s1.write("a") rescue p $!
+ Thread.new {
+ s1.write("a")
+ }.join'
+ ruby 1.9.3dev (2010-07-06 trunk 28554) [i686-linux]
+ #<Errno::EPIPE: Broken pipe>
+ [Hang Here]
+
+FreeBSD, Solaris, Mac doesn't. because Ruby's write() method call
+select() internally. and tcp_poll has a bug.
+
+SUS defined 'ready for writing' of select() as following.
+
+| A descriptor shall be considered ready for writing when a call to an output
+| function with O_NONBLOCK clear would not block, whether or not the function
+| would transfer data successfully.
+
+That said, EPIPE situation is clearly one of 'ready for writing'.
+
+We don't have read-side issue because tcp_poll() already has read side
+shutdown care.
+
+| if (sk->sk_shutdown & RCV_SHUTDOWN)
+| mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+
+So, Let's insert same logic in write side.
+
+- reference url
+ http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31065
+ http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31068
+
+Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/ipv4/tcp.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -453,7 +453,8 @@ unsigned int tcp_poll(struct file *file,
+ if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
+ mask |= POLLOUT | POLLWRNORM;
+ }
+- }
++ } else
++ mask |= POLLOUT | POLLWRNORM;
+
+ if (tp->urg_data & TCP_URG_VALID)
+ mask |= POLLPRI;
--- /dev/null
+From a499b2db4f2c7142a885e7090d2183795ecdf7d0 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet@gmail.com>
+Date: Wed, 8 Sep 2010 05:08:44 +0000
+Subject: udp: add rehash on connect()
+
+
+From: Eric Dumazet <eric.dumazet@gmail.com>
+
+commit 719f835853a92f6090258114a72ffe41f09155cd upstream
+
+commit 30fff923 introduced in linux-2.6.33 (udp: bind() optimisation)
+added a secondary hash on UDP, hashed on (local addr, local port).
+
+Problem is that following sequence :
+
+fd = socket(...)
+connect(fd, &remote, ...)
+
+not only selects remote end point (address and port), but also sets
+local address, while UDP stack stored in secondary hash table the socket
+while its local address was INADDR_ANY (or ipv6 equivalent)
+
+Sequence is :
+ - autobind() : choose a random local port, insert socket in hash tables
+ [while local address is INADDR_ANY]
+ - connect() : set remote address and port, change local address to IP
+ given by a route lookup.
+
+When an incoming UDP frame comes, if more than 10 sockets are found in
+primary hash table, we switch to secondary table, and fail to find
+socket because its local address changed.
+
+One solution to this problem is to rehash datagram socket if needed.
+
+We add a new rehash(struct socket *) method in "struct proto", and
+implement this method for UDP v4 & v6, using a common helper.
+
+This rehashing only takes care of secondary hash table, since primary
+hash (based on local port only) is not changed.
+
+Reported-by: Krzysztof Piotr Oledzki <ole@ans.pl>
+Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
+Tested-by: Krzysztof Piotr Oledzki <ole@ans.pl>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/net/sock.h | 1 +
+ include/net/udp.h | 1 +
+ net/ipv4/datagram.c | 5 ++++-
+ net/ipv4/udp.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
+ net/ipv6/datagram.c | 7 ++++++-
+ net/ipv6/udp.c | 10 ++++++++++
+ 6 files changed, 66 insertions(+), 2 deletions(-)
+
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -749,6 +749,7 @@ struct proto {
+ /* Keeping track of sk's, looking them up, and port selection methods. */
+ void (*hash)(struct sock *sk);
+ void (*unhash)(struct sock *sk);
++ void (*rehash)(struct sock *sk);
+ int (*get_port)(struct sock *sk, unsigned short snum);
+
+ /* Keeping track of sockets in use */
+--- a/include/net/udp.h
++++ b/include/net/udp.h
+@@ -151,6 +151,7 @@ static inline void udp_lib_hash(struct s
+ }
+
+ extern void udp_lib_unhash(struct sock *sk);
++extern void udp_lib_rehash(struct sock *sk, u16 new_hash);
+
+ static inline void udp_lib_close(struct sock *sk, long timeout)
+ {
+--- a/net/ipv4/datagram.c
++++ b/net/ipv4/datagram.c
+@@ -62,8 +62,11 @@ int ip4_datagram_connect(struct sock *sk
+ }
+ if (!inet->inet_saddr)
+ inet->inet_saddr = rt->rt_src; /* Update source address */
+- if (!inet->inet_rcv_saddr)
++ if (!inet->inet_rcv_saddr) {
+ inet->inet_rcv_saddr = rt->rt_src;
++ if (sk->sk_prot->rehash)
++ sk->sk_prot->rehash(sk);
++ }
+ inet->inet_daddr = rt->rt_dst;
+ inet->inet_dport = usin->sin_port;
+ sk->sk_state = TCP_ESTABLISHED;
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1260,6 +1260,49 @@ void udp_lib_unhash(struct sock *sk)
+ }
+ EXPORT_SYMBOL(udp_lib_unhash);
+
++/*
++ * inet_rcv_saddr was changed, we must rehash secondary hash
++ */
++void udp_lib_rehash(struct sock *sk, u16 newhash)
++{
++ if (sk_hashed(sk)) {
++ struct udp_table *udptable = sk->sk_prot->h.udp_table;
++ struct udp_hslot *hslot, *hslot2, *nhslot2;
++
++ hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
++ nhslot2 = udp_hashslot2(udptable, newhash);
++ udp_sk(sk)->udp_portaddr_hash = newhash;
++ if (hslot2 != nhslot2) {
++ hslot = udp_hashslot(udptable, sock_net(sk),
++ udp_sk(sk)->udp_port_hash);
++ /* we must lock primary chain too */
++ spin_lock_bh(&hslot->lock);
++
++ spin_lock(&hslot2->lock);
++ hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
++ hslot2->count--;
++ spin_unlock(&hslot2->lock);
++
++ spin_lock(&nhslot2->lock);
++ hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
++ &nhslot2->head);
++ nhslot2->count++;
++ spin_unlock(&nhslot2->lock);
++
++ spin_unlock_bh(&hslot->lock);
++ }
++ }
++}
++EXPORT_SYMBOL(udp_lib_rehash);
++
++static void udp_v4_rehash(struct sock *sk)
++{
++ u16 new_hash = udp4_portaddr_hash(sock_net(sk),
++ inet_sk(sk)->inet_rcv_saddr,
++ inet_sk(sk)->inet_num);
++ udp_lib_rehash(sk, new_hash);
++}
++
+ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ {
+ int rc;
+@@ -1843,6 +1886,7 @@ struct proto udp_prot = {
+ .backlog_rcv = __udp_queue_rcv_skb,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
++ .rehash = udp_v4_rehash,
+ .get_port = udp_v4_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
+--- a/net/ipv6/datagram.c
++++ b/net/ipv6/datagram.c
+@@ -104,9 +104,12 @@ ipv4_connected:
+ if (ipv6_addr_any(&np->saddr))
+ ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
+
+- if (ipv6_addr_any(&np->rcv_saddr))
++ if (ipv6_addr_any(&np->rcv_saddr)) {
+ ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
+ &np->rcv_saddr);
++ if (sk->sk_prot->rehash)
++ sk->sk_prot->rehash(sk);
++ }
+
+ goto out;
+ }
+@@ -191,6 +194,8 @@ ipv4_connected:
+ if (ipv6_addr_any(&np->rcv_saddr)) {
+ ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
+ inet->inet_rcv_saddr = LOOPBACK4_IPV6;
++ if (sk->sk_prot->rehash)
++ sk->sk_prot->rehash(sk);
+ }
+
+ ip6_dst_store(sk, dst,
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -111,6 +111,15 @@ int udp_v6_get_port(struct sock *sk, uns
+ return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
+ }
+
++static void udp_v6_rehash(struct sock *sk)
++{
++ u16 new_hash = udp6_portaddr_hash(sock_net(sk),
++ &inet6_sk(sk)->rcv_saddr,
++ inet_sk(sk)->inet_num);
++
++ udp_lib_rehash(sk, new_hash);
++}
++
+ static inline int compute_score(struct sock *sk, struct net *net,
+ unsigned short hnum,
+ struct in6_addr *saddr, __be16 sport,
+@@ -1452,6 +1461,7 @@ struct proto udpv6_prot = {
+ .backlog_rcv = udpv6_queue_rcv_skb,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
++ .rehash = udp_v6_rehash,
+ .get_port = udp_v6_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
--- /dev/null
+From f08e075573ad91db08c6fbfd0d760a6adb713f00 Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Date: Sat, 4 Sep 2010 01:34:28 +0000
+Subject: UNIX: Do not loop forever at unix_autobind().
+
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit a9117426d0fcc05a194f728159a2d43df43c7add ]
+
+We assumed that unix_autobind() never fails if kzalloc() succeeded.
+But unix_autobind() allows only 1048576 names. If /proc/sys/fs/file-max is
+larger than 1048576 (e.g. systems with more than 10GB of RAM), a local user can
+consume all names using fork()/socket()/bind().
+
+If all names are in use, those who call bind() with addr_len == sizeof(short)
+or connect()/sendmsg() with setsockopt(SO_PASSCRED) will continue
+
+ while (1)
+ yield();
+
+loop at unix_autobind() till a name becomes available.
+This patch adds a loop counter in order to give up after 1048576 attempts.
+
+Calling yield() for once per 256 attempts may not be sufficient when many names
+are already in use, for __unix_find_socket_byname() can take long time under
+such circumstance. Therefore, this patch also adds cond_resched() call.
+
+Note that currently a local user can consume 2GB of kernel memory if the user
+is allowed to create and autobind 1048576 UNIX domain sockets. We should
+consider adding some restriction for autobind operation.
+
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ net/unix/af_unix.c | 15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -673,6 +673,7 @@ static int unix_autobind(struct socket *
+ static u32 ordernum = 1;
+ struct unix_address *addr;
+ int err;
++ unsigned int retries = 0;
+
+ mutex_lock(&u->readlock);
+
+@@ -698,9 +699,17 @@ retry:
+ if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
+ addr->hash)) {
+ spin_unlock(&unix_table_lock);
+- /* Sanity yield. It is unusual case, but yet... */
+- if (!(ordernum&0xFF))
+- yield();
++ /*
++ * __unix_find_socket_byname() may take long time if many names
++ * are already in use.
++ */
++ cond_resched();
++ /* Give up if all names seems to be in use. */
++ if (retries++ == 0xFFFFF) {
++ err = -ENOSPC;
++ kfree(addr);
++ goto out;
++ }
+ goto retry;
+ }
+ addr->hash ^= sk->sk_type;
--- /dev/null
+From 024cfa5943a7e89565c60b612d698c2bfb3da66a Mon Sep 17 00:00:00 2001
+From: Mathias Nyman <mathias.nyman@nokia.com>
+Date: Mon, 6 Sep 2010 13:52:01 +0300
+Subject: usb: musb_debugfs: don't use the struct file private_data field with seq_files
+
+From: Mathias Nyman <mathias.nyman@nokia.com>
+
+commit 024cfa5943a7e89565c60b612d698c2bfb3da66a upstream.
+
+seq_files use the private_data field of a file struct for storing a seq_file structure,
+data should be stored in seq_file's own private field (e.g. file->private_data->private)
+Otherwise seq_release() will free the private data when the file is closed.
+
+Signed-off-by: Mathias Nyman <mathias.nyman@nokia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/usb/musb/musb_debugfs.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/usb/musb/musb_debugfs.c
++++ b/drivers/usb/musb/musb_debugfs.c
+@@ -195,15 +195,14 @@ static const struct file_operations musb
+
+ static int musb_test_mode_open(struct inode *inode, struct file *file)
+ {
+- file->private_data = inode->i_private;
+-
+ return single_open(file, musb_test_mode_show, inode->i_private);
+ }
+
+ static ssize_t musb_test_mode_write(struct file *file,
+ const char __user *ubuf, size_t count, loff_t *ppos)
+ {
+- struct musb *musb = file->private_data;
++ struct seq_file *s = file->private_data;
++ struct musb *musb = s->private;
+ u8 test = 0;
+ char buf[18];
+
--- /dev/null
+From a0846f1868b11cd827bdfeaf4527d8b1b1c0b098 Mon Sep 17 00:00:00 2001
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+Date: Wed, 15 Sep 2010 17:44:16 -0400
+Subject: USB: serial/mos*: prevent reading uninitialized stack memory
+
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+
+commit a0846f1868b11cd827bdfeaf4527d8b1b1c0b098 upstream.
+
+The TIOCGICOUNT device ioctl in both mos7720.c and mos7840.c allows
+unprivileged users to read uninitialized stack memory, because the
+"reserved" member of the serial_icounter_struct struct declared on the
+stack is not altered or zeroed before being copied back to the user.
+This patch takes care of it.
+
+Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/usb/serial/mos7720.c | 3 +++
+ drivers/usb/serial/mos7840.c | 3 +++
+ 2 files changed, 6 insertions(+)
+
+--- a/drivers/usb/serial/mos7720.c
++++ b/drivers/usb/serial/mos7720.c
+@@ -2024,6 +2024,9 @@ static int mos7720_ioctl(struct tty_stru
+
+ case TIOCGICOUNT:
+ cnow = mos7720_port->icount;
++
++ memset(&icount, 0, sizeof(struct serial_icounter_struct));
++
+ icount.cts = cnow.cts;
+ icount.dsr = cnow.dsr;
+ icount.rng = cnow.rng;
+--- a/drivers/usb/serial/mos7840.c
++++ b/drivers/usb/serial/mos7840.c
+@@ -2285,6 +2285,9 @@ static int mos7840_ioctl(struct tty_stru
+ case TIOCGICOUNT:
+ cnow = mos7840_port->icount;
+ smp_rmb();
++
++ memset(&icount, 0, sizeof(struct serial_icounter_struct));
++
+ icount.cts = cnow.cts;
+ icount.dsr = cnow.dsr;
+ icount.rng = cnow.rng;