From: Chris Wright <chrisw@sous-sol.org>
Date: Tue, 31 Oct 2006 11:20:53 +0000 (-0800)
Subject: more patches to queue
X-Git-Tag: v2.6.18.2~10
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f4f57af4fa74f1ce07f7863742e967920c13c8a4;p=thirdparty%2Fkernel%2Fstable-queue.git

more patches to queue
---

diff --git a/queue-2.6.18/check-bio-address-after-mapping-through-partitions.patch b/queue-2.6.18/check-bio-address-after-mapping-through-partitions.patch
new file mode 100644
index 00000000000..4e5b8e0bc2b
--- /dev/null
+++ b/queue-2.6.18/check-bio-address-after-mapping-through-partitions.patch
@@ -0,0 +1,74 @@
+From stable-bounces@linux.kernel.org  Mon Oct 30 17:54:57 2006
+Date: Tue, 31 Oct 2006 12:51:45 +1100
+From: NeilBrown <neilb@suse.de>
+To: Andrew Morton <akpm@osdl.org>
+Cc: stable@kernel.org, Jens Axboe <jens.axboe@oracle.com>
+Subject: [stable] [PATCH] Check bio address after mapping through partitions.
+
+Partitions are not limited to live within a device.  So
+we should range check after partition mapping.
+
+Note that 'maxsector' was being used for two different things.  I have
+split off the second usage into 'old_sector' so that maxsector can be
+still be used for it's primary usage later in the function.
+
+Cc: Jens Axboe <jens.axboe@oracle.com>
+Signed-off-by: Neil Brown <neilb@suse.de>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ block/ll_rw_blk.c |   24 ++++++++++++++++++++----
+ 1 file changed, 20 insertions(+), 4 deletions(-)
+
+--- linux-2.6.18.1.orig/block/ll_rw_blk.c
++++ linux-2.6.18.1/block/ll_rw_blk.c
+@@ -3021,6 +3021,7 @@ void generic_make_request(struct bio *bi
+ {
+ 	request_queue_t *q;
+ 	sector_t maxsector;
++	sector_t old_sector;
+ 	int ret, nr_sectors = bio_sectors(bio);
+ 	dev_t old_dev;
+ 
+@@ -3049,7 +3050,7 @@ void generic_make_request(struct bio *bi
+ 	 * NOTE: we don't repeat the blk_size check for each new device.
+ 	 * Stacking drivers are expected to know what they are doing.
+ 	 */
+-	maxsector = -1;
++	old_sector = -1;
+ 	old_dev = 0;
+ 	do {
+ 		char b[BDEVNAME_SIZE];
+@@ -3083,15 +3084,30 @@ end_io:
+ 		 */
+ 		blk_partition_remap(bio);
+ 
+-		if (maxsector != -1)
++		if (old_sector != -1)
+ 			blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 
+-					    maxsector);
++					    old_sector);
+ 
+ 		blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+ 
+-		maxsector = bio->bi_sector;
++		old_sector = bio->bi_sector;
+ 		old_dev = bio->bi_bdev->bd_dev;
+ 
++		maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
++		if (maxsector) {
++			sector_t sector = bio->bi_sector;
++
++			if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
++				/*
++				 * This may well happen - partitions are not checked
++				 * to make sure they are within the size of the
++				 * whole device.
++				 */
++				handle_bad_sector(bio);
++				goto end_io;
++			}
++		}
++
+ 		ret = q->make_request_fn(q, bio);
+ 	} while (ret);
+ }
diff --git a/queue-2.6.18/fill_tgid-fix-task_struct-leak-and-possible-oops.patch b/queue-2.6.18/fill_tgid-fix-task_struct-leak-and-possible-oops.patch
new file mode 100644
index 00000000000..6c50711ea34
--- /dev/null
+++ b/queue-2.6.18/fill_tgid-fix-task_struct-leak-and-possible-oops.patch
@@ -0,0 +1,58 @@
+From fca178c0c6e8d52a1875be36b070f30884ebfae9 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@tv-sign.ru>
+Date: Sat, 28 Oct 2006 10:38:49 -0700
+Subject: fill_tgid: fix task_struct leak and possible oops
+
+1. fill_tgid() forgets to do put_task_struct(first).
+
+2. release_task(first) can happen after fill_tgid() drops tasklist_lock,
+   it is unsafe to dereference first->signal.
+
+This is a temporary fix, imho the locking should be reworked.
+
+Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
+Cc: Shailabh Nagar <nagar@watson.ibm.com>
+Cc: Balbir Singh <balbir@in.ibm.com>
+Cc: Jay Lan <jlan@sgi.com>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ kernel/taskstats.c |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- linux-2.6.18.1.orig/kernel/taskstats.c
++++ linux-2.6.18.1/kernel/taskstats.c
+@@ -229,14 +229,17 @@ static int fill_tgid(pid_t tgid, struct 
+ 	} else
+ 		get_task_struct(first);
+ 
+-	/* Start with stats from dead tasks */
+-	spin_lock_irqsave(&first->signal->stats_lock, flags);
+-	if (first->signal->stats)
+-		memcpy(stats, first->signal->stats, sizeof(*stats));
+-	spin_unlock_irqrestore(&first->signal->stats_lock, flags);
+ 
+ 	tsk = first;
+ 	read_lock(&tasklist_lock);
++	/* Start with stats from dead tasks */
++	if (first->signal) {
++		spin_lock_irqsave(&first->signal->stats_lock, flags);
++		if (first->signal->stats)
++			memcpy(stats, first->signal->stats, sizeof(*stats));
++		spin_unlock_irqrestore(&first->signal->stats_lock, flags);
++	}
++
+ 	do {
+ 		if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk))
+ 			continue;
+@@ -256,7 +259,7 @@ static int fill_tgid(pid_t tgid, struct 
+ 	 * Accounting subsytems can also add calls here to modify
+ 	 * fields of taskstats.
+ 	 */
+-
++	put_task_struct(first);
+ 	return 0;
+ }
+ 
diff --git a/queue-2.6.18/ipv6-fix-lockup-via-proc-net-ip6_flowlabel.patch b/queue-2.6.18/ipv6-fix-lockup-via-proc-net-ip6_flowlabel.patch
new file mode 100644
index 00000000000..3517201aa08
--- /dev/null
+++ b/queue-2.6.18/ipv6-fix-lockup-via-proc-net-ip6_flowlabel.patch
@@ -0,0 +1,38 @@
+From stable-bounces@linux.kernel.org  Mon Oct 30 15:14:51 2006
+Date: Mon, 30 Oct 2006 15:11:21 -0800 (PST)
+From: David Miller <davem@davemloft.net>
+To: stable@kernel.org
+Cc: bunk@stusta.de
+Subject: IPV6: fix lockup via /proc/net/ip6_flowlabel
+
+From: James Morris <jmorris@namei.org>
+
+There's a bug in the seqfile handling for /proc/net/ip6_flowlabel, where, 
+after finding a flowlabel, the code will loop forever not finding any 
+further flowlabels, first traversing the rest of the hash bucket then just 
+looping.
+
+This patch fixes the problem by breaking after the hash bucket has been 
+traversed.
+
+Note that this bug can cause lockups and oopses, and is trivially invoked 
+by an unpriveleged user.
+
+Signed-off-by: James Morris <jmorris@namei.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/ipv6/ip6_flowlabel.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- linux-2.6.18.1.orig/net/ipv6/ip6_flowlabel.c
++++ linux-2.6.18.1/net/ipv6/ip6_flowlabel.c
+@@ -587,6 +587,8 @@ static struct ip6_flowlabel *ip6fl_get_n
+ 	while (!fl) {
+ 		if (++state->bucket <= FL_HASH_MASK)
+ 			fl = fl_ht[state->bucket];
++		else
++			break;
+ 	}
+ 	return fl;
+ }
diff --git a/queue-2.6.18/isdn-fix-drivers-by-handling-errors-thrown-by-readstat.patch b/queue-2.6.18/isdn-fix-drivers-by-handling-errors-thrown-by-readstat.patch
new file mode 100644
index 00000000000..8c0766cb09d
--- /dev/null
+++ b/queue-2.6.18/isdn-fix-drivers-by-handling-errors-thrown-by-readstat.patch
@@ -0,0 +1,49 @@
+From 04518bfe8eac2e82b476fb2b0093527adc2bc791 Mon Sep 17 00:00:00 2001
+From: Jeff Garzik <jeff@garzik.org>
+Date: Tue, 17 Oct 2006 00:10:39 -0700
+Subject: ISDN: fix drivers, by handling errors thrown by ->readstat()
+
+This is a particularly ugly on-failure bug, possibly security, since the
+lack of error handling here is covering up another class of bug: failure to
+handle copy_to_user() return values.
+
+The I4L API function ->readstat() returns an integer, and by looking at
+several existing driver implementations, it is clear that a negative return
+value was meant to indicate an error.
+
+Given that several drivers already return a negative value indicating an
+errno-style error, the current code would blindly accept that [negative]
+value as a valid amount of bytes read.  Obvious damage ensues.
+
+Correcting ->readstat() handling to properly notice errors fixes the
+existing code to work correctly on error, and enables future patches to
+more easily indicate errors during operation.
+
+Signed-off-by: Jeff Garzik <jeff@garzik.org>
+Cc: Karsten Keil <kkeil@suse.de>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ drivers/isdn/i4l/isdn_common.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- linux-2.6.18.1.orig/drivers/isdn/i4l/isdn_common.c
++++ linux-2.6.18.1/drivers/isdn/i4l/isdn_common.c
+@@ -1134,9 +1134,12 @@ isdn_read(struct file *file, char __user
+ 		if (dev->drv[drvidx]->interface->readstat) {
+ 			if (count > dev->drv[drvidx]->stavail)
+ 				count = dev->drv[drvidx]->stavail;
+-			len = dev->drv[drvidx]->interface->
+-				readstat(buf, count, drvidx,
+-					 isdn_minor2chan(minor));
++			len = dev->drv[drvidx]->interface->readstat(buf, count,
++						drvidx, isdn_minor2chan(minor));
++			if (len < 0) {
++				retval = len;
++				goto out;
++			}
+ 		} else {
+ 			len = 0;
+ 		}
diff --git a/queue-2.6.18/jmb-368-pata-detection.patch b/queue-2.6.18/jmb-368-pata-detection.patch
new file mode 100644
index 00000000000..cbf463b53e9
--- /dev/null
+++ b/queue-2.6.18/jmb-368-pata-detection.patch
@@ -0,0 +1,32 @@
+From c333526f489044be2b93085720eb898f0037b346 Mon Sep 17 00:00:00 2001
+From: Alan Cox <alan@lxorguk.ukuu.org.uk>
+Date: Sat, 28 Oct 2006 10:38:57 -0700
+Subject: JMB 368 PATA detection
+
+The Jmicron JMB368 is PATA only so has the PATA on function zero.  Don't
+therefore skip function zero on this device when probing
+
+Signed-off-by: Alan Cox <alan@redhat.com>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ drivers/ide/pci/generic.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- linux-2.6.18.1.orig/drivers/ide/pci/generic.c
++++ linux-2.6.18.1/drivers/ide/pci/generic.c
+@@ -242,8 +242,10 @@ static int __devinit generic_init_one(st
+ 	    (!(PCI_FUNC(dev->devfn) & 1)))
+ 		goto out;
+ 
+-	if (dev->vendor == PCI_VENDOR_ID_JMICRON && PCI_FUNC(dev->devfn) != 1)
+-		goto out;
++	if (dev->vendor == PCI_VENDOR_ID_JMICRON) {
++		if (dev->device != PCI_DEVICE_ID_JMICRON_JMB368 && PCI_FUNC(dev->devfn) != 1)
++			goto out;
++	}
+ 
+ 	if (dev->vendor != PCI_VENDOR_ID_JMICRON) {
+ 		pci_read_config_word(dev, PCI_COMMAND, &command);
diff --git a/queue-2.6.18/nfs-nfs_lookup-don-t-hash-dentry-when-optimising-away-the-lookup.patch b/queue-2.6.18/nfs-nfs_lookup-don-t-hash-dentry-when-optimising-away-the-lookup.patch
new file mode 100644
index 00000000000..389ae7cb394
--- /dev/null
+++ b/queue-2.6.18/nfs-nfs_lookup-don-t-hash-dentry-when-optimising-away-the-lookup.patch
@@ -0,0 +1,53 @@
+From fd6840714d9cf6e93f1d42b904860a94df316b85 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Tue, 5 Sep 2006 12:27:44 -0400
+Subject: NFS: nfs_lookup - don't hash dentry when optimising away the lookup
+
+If the open intents tell us that a given lookup is going to result in a,
+exclusive create, we currently optimize away the lookup call itself. The
+reason is that the lookup would not be atomic with the create RPC call, so
+why do it in the first place?
+
+A problem occurs, however, if the VFS aborts the exclusive create operation
+after the lookup, but before the call to create the file/directory: in this
+case we will end up with a hashed negative dentry in the dcache that has
+never been looked up.
+Fix this by only actually hashing the dentry once the create operation has
+been successfully completed.
+
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ fs/nfs/dir.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- linux-2.6.18.1.orig/fs/nfs/dir.c
++++ linux-2.6.18.1/fs/nfs/dir.c
+@@ -902,9 +902,15 @@ static struct dentry *nfs_lookup(struct 
+ 
+ 	lock_kernel();
+ 
+-	/* If we're doing an exclusive create, optimize away the lookup */
+-	if (nfs_is_exclusive_create(dir, nd))
+-		goto no_entry;
++	/*
++	 * If we're doing an exclusive create, optimize away the lookup
++	 * but don't hash the dentry.
++	 */
++	if (nfs_is_exclusive_create(dir, nd)) {
++		d_instantiate(dentry, NULL);
++		res = NULL;
++		goto out_unlock;
++	}
+ 
+ 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
+ 	if (error == -ENOENT)
+@@ -1156,6 +1162,8 @@ int nfs_instantiate(struct dentry *dentr
+ 	if (IS_ERR(inode))
+ 		goto out_err;
+ 	d_instantiate(dentry, inode);
++	if (d_unhashed(dentry))
++		d_rehash(dentry);
+ 	return 0;
+ out_err:
+ 	d_drop(dentry);
diff --git a/queue-2.6.18/pci-remove-quirk_via_abnormal_poweroff.patch b/queue-2.6.18/pci-remove-quirk_via_abnormal_poweroff.patch
new file mode 100644
index 00000000000..d2a645942b9
--- /dev/null
+++ b/queue-2.6.18/pci-remove-quirk_via_abnormal_poweroff.patch
@@ -0,0 +1,73 @@
+From 3560cc5ec3488b20d927f7160a21a0df1d1fda20 Mon Sep 17 00:00:00 2001
+From: Karsten Wiese <annabellesgarden@yahoo.de>
+Date: Fri, 20 Oct 2006 14:45:36 -0700
+Subject: PCI: Remove quirk_via_abnormal_poweroff
+
+My K8T800 mobo resumes fine from suspend to ram with and without patch
+applied against 2.6.18.
+
+quirk_via_abnormal_poweroff makes some boards not boot 2.6.18, so IMO patch
+should go to head, 2.6.18.2 and everywhere "ACPI: ACPICA 20060623" has been
+applied.
+
+
+Remove quirk_via_abnormal_poweroff
+
+Obsoleted by "ACPI: ACPICA 20060623":
+<snip>
+    Implemented support for "ignored" bits in the ACPI
+    registers.  According to the ACPI specification, these
+    bits should be preserved when writing the registers via
+    a read/modify/write cycle. There are 3 bits preserved
+    in this manner: PM1_CONTROL[0] (SCI_EN), PM1_CONTROL[9],
+    and PM1_STATUS[11].
+    http://bugzilla.kernel.org/show_bug.cgi?id=3691
+</snip>
+
+Signed-off-by: Karsten Wiese <fzu@wemgehoertderstaat.de>
+Cc: Bob Moore <robert.moore@intel.com>
+Cc: "Brown, Len" <len.brown@intel.com>
+Acked-by: Dave Jones <davej@redhat.com>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ drivers/pci/quirks.c |   27 ---------------------------
+ 1 file changed, 27 deletions(-)
+
+--- linux-2.6.18.1.orig/drivers/pci/quirks.c
++++ linux-2.6.18.1/drivers/pci/quirks.c
+@@ -685,33 +685,6 @@ static void __devinit quirk_vt82c598_id(
+ }
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C597_0,	quirk_vt82c598_id );
+ 
+-#ifdef CONFIG_ACPI_SLEEP
+-
+-/*
+- * Some VIA systems boot with the abnormal status flag set. This can cause
+- * the BIOS to re-POST the system on resume rather than passing control
+- * back to the OS.  Clear the flag on boot
+- */
+-static void __devinit quirk_via_abnormal_poweroff(struct pci_dev *dev)
+-{
+-	u32 reg;
+-
+-	acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK, ACPI_REGISTER_PM1_STATUS,
+-				&reg);
+-
+-	if (reg & 0x800) {
+-		printk("Clearing abnormal poweroff flag\n");
+-		acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
+-					ACPI_REGISTER_PM1_STATUS,
+-					(u16)0x800);
+-	}
+-}
+-
+-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, quirk_via_abnormal_poweroff);
+-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, quirk_via_abnormal_poweroff);
+-
+-#endif
+-
+ /*
+  * CardBus controllers have a legacy base address that enables them
+  * to respond as i82365 pcmcia controllers.  We don't want them to
diff --git a/queue-2.6.18/posix-cpu-timers-prevent-signal-delivery-starvation.patch b/queue-2.6.18/posix-cpu-timers-prevent-signal-delivery-starvation.patch
new file mode 100644
index 00000000000..b4b19d3cfa5
--- /dev/null
+++ b/queue-2.6.18/posix-cpu-timers-prevent-signal-delivery-starvation.patch
@@ -0,0 +1,137 @@
+From stable-bounces@linux.kernel.org  Tue Oct 17 00:12:55 2006
+Date: Tue, 17 Oct 2006 00:09:39 -0700
+From: akpm@osdl.org
+To: torvalds@osdl.org
+Cc: akpm@osdl.org, dwalker@mvista.com, pmattis@google.com, johnstul@us.ibm.com, toyoa@mvista.com, stable@kernel.org, zippel@linux-m68k.org, mbligh@google.com, spark@google.com, rohitseth@google.com, tglx@linutronix.de, mingo@elte.hu, roland@redhat.com
+Subject: posix-cpu-timers: prevent signal delivery starvation
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+The integer divisions in the timer accounting code can round the result
+down to 0.  Adding 0 is without effect and the signal delivery stops.
+
+Clamp the division result to minimum 1 to avoid this.
+
+Problem was reported by Seongbae Park <spark@google.com>, who provided
+also an inital patch.
+
+Roland sayeth:
+
+  I have had some more time to think about the problem, and to reproduce it
+  using Toyo's test case.  For the record, if my understanding of the problem
+  is correct, this happens only in one very particular case.  First, the
+  expiry time has to be so soon that in cputime_t units (usually 1s/HZ ticks)
+  it's < nthreads so the division yields zero.  Second, it only affects each
+  thread that is so new that its CPU time accumulation is zero so now+0 is
+  still zero and ->it_*_expires winds up staying zero.  For the VIRT and PROF
+  clocks when cputime_t is tick granularity (or the SCHED clock on
+  configurations where sched_clock's value only advances on clock ticks), this
+  is not hard to arrange with new threads starting up and blocking before they
+  accumulate a whole tick of CPU time.  That's what happens in Toyo's test
+  case.
+
+  Note that in general it is fine for that division to round down to zero,
+  and set each thread's expiry time to its "now" time.  The problem only
+  arises with thread's whose "now" value is still zero, so that now+0 winds up
+  0 and is interpreted as "not set" instead of ">= now".  So it would be a
+  sufficient and more precise fix to just use max(ticks, 1) inside the loop
+  when setting each it_*_expires value.
+
+  But, it does no harm to round the division up to one and always advance
+  every thread's expiry time.  If the thread didn't already fire timers for
+  the expiry time of "now", there is no expectation that it will do so before
+  the next tick anyway.  So I followed Thomas's patch in lifting the max out
+  of the loops.
+
+  This patch also covers the reload cases, which are harder to write a test
+  for (and I didn't try).  I've tested it with Toyo's case and it fixes that.
+
+
+[toyoa@mvista.com: fix: min_t -> max_t]
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Roland McGrath <roland@redhat.com>
+Cc: Daniel Walker <dwalker@mvista.com>
+Cc: Toyo Abe <toyoa@mvista.com>
+Cc: john stultz <johnstul@us.ibm.com>
+Cc: Roman Zippel <zippel@linux-m68k.org>
+Cc: Seongbae Park <spark@google.com>
+Cc: Peter Mattis <pmattis@google.com>
+Cc: Rohit Seth <rohitseth@google.com>
+Cc: Martin Bligh <mbligh@google.com>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+
+ kernel/posix-cpu-timers.c |   27 +++++++++++++++++++++------
+ 1 file changed, 21 insertions(+), 6 deletions(-)
+
+--- linux-2.6.18.1.orig/kernel/posix-cpu-timers.c
++++ linux-2.6.18.1/kernel/posix-cpu-timers.c
+@@ -88,6 +88,19 @@ static inline union cpu_time_count cpu_t
+ }
+ 
+ /*
++ * Divide and limit the result to res >= 1
++ *
++ * This is necessary to prevent signal delivery starvation, when the result of
++ * the division would be rounded down to 0.
++ */
++static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div)
++{
++	cputime_t res = cputime_div(time, div);
++
++	return max_t(cputime_t, res, 1);
++}
++
++/*
+  * Update expiry time from increment, and increase overrun count,
+  * given the current clock sample.
+  */
+@@ -483,8 +496,8 @@ static void process_timer_rebalance(stru
+ 		BUG();
+ 		break;
+ 	case CPUCLOCK_PROF:
+-		left = cputime_div(cputime_sub(expires.cpu, val.cpu),
+-				   nthreads);
++		left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
++				       nthreads);
+ 		do {
+ 			if (likely(!(t->flags & PF_EXITING))) {
+ 				ticks = cputime_add(prof_ticks(t), left);
+@@ -498,8 +511,8 @@ static void process_timer_rebalance(stru
+ 		} while (t != p);
+ 		break;
+ 	case CPUCLOCK_VIRT:
+-		left = cputime_div(cputime_sub(expires.cpu, val.cpu),
+-				   nthreads);
++		left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
++				       nthreads);
+ 		do {
+ 			if (likely(!(t->flags & PF_EXITING))) {
+ 				ticks = cputime_add(virt_ticks(t), left);
+@@ -515,6 +528,7 @@ static void process_timer_rebalance(stru
+ 	case CPUCLOCK_SCHED:
+ 		nsleft = expires.sched - val.sched;
+ 		do_div(nsleft, nthreads);
++		nsleft = max_t(unsigned long long, nsleft, 1);
+ 		do {
+ 			if (likely(!(t->flags & PF_EXITING))) {
+ 				ns = t->sched_time + nsleft;
+@@ -1159,12 +1173,13 @@ static void check_process_timers(struct 
+ 
+ 		prof_left = cputime_sub(prof_expires, utime);
+ 		prof_left = cputime_sub(prof_left, stime);
+-		prof_left = cputime_div(prof_left, nthreads);
++		prof_left = cputime_div_non_zero(prof_left, nthreads);
+ 		virt_left = cputime_sub(virt_expires, utime);
+-		virt_left = cputime_div(virt_left, nthreads);
++		virt_left = cputime_div_non_zero(virt_left, nthreads);
+ 		if (sched_expires) {
+ 			sched_left = sched_expires - sched_time;
+ 			do_div(sched_left, nthreads);
++			sched_left = max_t(unsigned long long, sched_left, 1);
+ 		} else {
+ 			sched_left = 0;
+ 		}
diff --git a/queue-2.6.18/reintroduce-nodes_span_other_nodes-for-powerpc.patch b/queue-2.6.18/reintroduce-nodes_span_other_nodes-for-powerpc.patch
new file mode 100644
index 00000000000..27f45a29d3c
--- /dev/null
+++ b/queue-2.6.18/reintroduce-nodes_span_other_nodes-for-powerpc.patch
@@ -0,0 +1,85 @@
+From 7516795739bd53175629b90fab0ad488d7a6a9f7 Mon Sep 17 00:00:00 2001
+From: Andy Whitcroft <apw@shadowen.org>
+Date: Sat, 21 Oct 2006 10:24:14 -0700
+Subject: Reintroduce NODES_SPAN_OTHER_NODES for powerpc
+
+Revert "[PATCH] Remove SPAN_OTHER_NODES config definition"
+    This reverts commit f62859bb6871c5e4a8e591c60befc8caaf54db8c.
+Revert "[PATCH] mm: remove arch independent NODES_SPAN_OTHER_NODES"
+    This reverts commit a94b3ab7eab4edcc9b2cb474b188f774c331adf7.
+
+Also update the comments to indicate that this is still required
+and where its used.
+
+Signed-off-by: Andy Whitcroft <apw@shadowen.org>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Mike Kravetz <kravetz@us.ibm.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Acked-by: Mel Gorman <mel@csn.ul.ie>
+Acked-by: Will Schmidt <will_schmidt@vnet.ibm.com>
+Cc: Christoph Lameter <clameter@sgi.com>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ arch/powerpc/Kconfig                   |    9 +++++++++
+ arch/powerpc/configs/pseries_defconfig |    1 +
+ include/linux/mmzone.h                 |    6 ++++++
+ mm/page_alloc.c                        |    2 ++
+ 4 files changed, 18 insertions(+)
+
+--- linux-2.6.18.1.orig/arch/powerpc/Kconfig
++++ linux-2.6.18.1/arch/powerpc/Kconfig
+@@ -729,6 +729,15 @@ config ARCH_MEMORY_PROBE
+ 	def_bool y
+ 	depends on MEMORY_HOTPLUG
+ 
++# Some NUMA nodes have memory ranges that span
++# other nodes.  Even though a pfn is valid and
++# between a node's start and end pfns, it may not
++# reside on that node.  See memmap_init_zone()
++# for details.
++config NODES_SPAN_OTHER_NODES
++	def_bool y
++	depends on NEED_MULTIPLE_NODES
++
+ config PPC_64K_PAGES
+ 	bool "64k page size"
+ 	depends on PPC64
+--- linux-2.6.18.1.orig/arch/powerpc/configs/pseries_defconfig
++++ linux-2.6.18.1/arch/powerpc/configs/pseries_defconfig
+@@ -184,6 +184,7 @@ CONFIG_SPLIT_PTLOCK_CPUS=4
+ CONFIG_MIGRATION=y
+ CONFIG_RESOURCES_64BIT=y
+ CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
++CONFIG_NODES_SPAN_OTHER_NODES=y
+ # CONFIG_PPC_64K_PAGES is not set
+ CONFIG_SCHED_SMT=y
+ CONFIG_PROC_DEVICETREE=y
+--- linux-2.6.18.1.orig/include/linux/mmzone.h
++++ linux-2.6.18.1/include/linux/mmzone.h
+@@ -632,6 +632,12 @@ void sparse_init(void);
+ #define sparse_index_init(_sec, _nid)  do {} while (0)
+ #endif /* CONFIG_SPARSEMEM */
+ 
++#ifdef CONFIG_NODES_SPAN_OTHER_NODES
++#define early_pfn_in_nid(pfn, nid)	(early_pfn_to_nid(pfn) == (nid))
++#else
++#define early_pfn_in_nid(pfn, nid)	(1)
++#endif
++
+ #ifndef early_pfn_valid
+ #define early_pfn_valid(pfn)	(1)
+ #endif
+--- linux-2.6.18.1.orig/mm/page_alloc.c
++++ linux-2.6.18.1/mm/page_alloc.c
+@@ -1673,6 +1673,8 @@ void __meminit memmap_init_zone(unsigned
+ 	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ 		if (!early_pfn_valid(pfn))
+ 			continue;
++		if (!early_pfn_in_nid(pfn, nid))
++			continue;
+ 		page = pfn_to_page(pfn);
+ 		set_page_links(page, zone, nid, pfn);
+ 		init_page_count(page);
diff --git a/queue-2.6.18/rtc-max6902-month-conversion-fix.patch b/queue-2.6.18/rtc-max6902-month-conversion-fix.patch
new file mode 100644
index 00000000000..8b3a0ff029c
--- /dev/null
+++ b/queue-2.6.18/rtc-max6902-month-conversion-fix.patch
@@ -0,0 +1,39 @@
+From stable-bounces@linux.kernel.org  Tue Oct 17 00:12:18 2006
+Date: Tue, 17 Oct 2006 00:09:53 -0700
+From: akpm@osdl.org
+To: torvalds@osdl.org
+Cc: akpm@osdl.org, a.zummo@towertech.it, flarramendi@gmail.com, raph@raphnet.net, stable@kernel.org
+Subject: rtc-max6902: month conversion fix
+
+From: Francisco Larramendi <flarramendi@gmail.com>
+
+Fix October-only BCD-to-binary conversion bug:
+
+	0x08 -> 7
+	0x09 -> 8
+	0x10 -> 15 (!)
+	0x11 -> 19
+
+Fixes http://bugzilla.kernel.org/show_bug.cgi?id=7361
+
+Cc: Raphael Assenat <raph@raphnet.net>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+
+ drivers/rtc/rtc-max6902.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- linux-2.6.18.1.orig/drivers/rtc/rtc-max6902.c
++++ linux-2.6.18.1/drivers/rtc/rtc-max6902.c
+@@ -137,7 +137,7 @@ static int max6902_get_datetime(struct d
+ 	dt->tm_min	= BCD2BIN(chip->buf[2]);
+ 	dt->tm_hour	= BCD2BIN(chip->buf[3]);
+ 	dt->tm_mday	= BCD2BIN(chip->buf[4]);
+-	dt->tm_mon	= BCD2BIN(chip->buf[5] - 1);
++	dt->tm_mon	= BCD2BIN(chip->buf[5]) - 1;
+ 	dt->tm_wday	= BCD2BIN(chip->buf[6]);
+ 	dt->tm_year = BCD2BIN(chip->buf[7]);
+ 
diff --git a/queue-2.6.18/series b/queue-2.6.18/series
index 85fb75e1645..b9d16f4fd11 100644
--- a/queue-2.6.18/series
+++ b/queue-2.6.18/series
@@ -44,3 +44,17 @@ uml-remove-warnings-added-by-previous-stable-patch.patch
 alsa-snd_rtctimer-handle-rtc-interrupts-with-a-tasklet.patch
 watchdog-sc1200wdt-fix-missing-pnp_unregister_driver.patch
 fix-intel-rng-detection.patch
+posix-cpu-timers-prevent-signal-delivery-starvation.patch
+rtc-max6902-month-conversion-fix.patch
+isdn-fix-drivers-by-handling-errors-thrown-by-readstat.patch
+sparc64-fix-pci-memory-space-root-resource-on-hummingbird.patch
+pci-remove-quirk_via_abnormal_poweroff.patch
+reintroduce-nodes_span_other_nodes-for-powerpc.patch
+nfs-nfs_lookup-don-t-hash-dentry-when-optimising-away-the-lookup.patch
+vmscan-fix-temp_priority-race.patch
+use-min-of-two-prio-settings-in-calculating-distress-for-reclaim.patch
+fill_tgid-fix-task_struct-leak-and-possible-oops.patch
+jmb-368-pata-detection.patch
+tcp-cubic-scaling-error.patch
+ipv6-fix-lockup-via-proc-net-ip6_flowlabel.patch
+check-bio-address-after-mapping-through-partitions.patch
diff --git a/queue-2.6.18/sparc64-fix-pci-memory-space-root-resource-on-hummingbird.patch b/queue-2.6.18/sparc64-fix-pci-memory-space-root-resource-on-hummingbird.patch
new file mode 100644
index 00000000000..8d35b58b74f
--- /dev/null
+++ b/queue-2.6.18/sparc64-fix-pci-memory-space-root-resource-on-hummingbird.patch
@@ -0,0 +1,140 @@
+From stable-bounces@linux.kernel.org  Wed Oct 18 13:40:37 2006
+Date: Wed, 18 Oct 2006 13:38:49 -0700 (PDT)
+From: David Miller <davem@davemloft.net>
+To: stable@kernel.org
+Subject: SPARC64: Fix PCI memory space root resource on Hummingbird.
+
+For Hummingbird PCI controllers, we should create the root
+PCI memory space resource as the full 4GB area, and then
+allocate the IOMMU DMA translation window out of there.
+
+The old code just assumed that the IOMMU DMA translation base
+to the top of the 4GB area was unusable.  This is not true on
+many systems such as SB100 and SB150, where the IOMMU DMA
+translation window sits at 0xc0000000->0xdfffffff.
+
+So what would happen is that any device mapped by the firmware
+at the top section 0xe0000000->0xffffffff would get remapped
+by Linux somewhere else leading to all kinds of problems and
+boot failures.
+
+While we're here, report more cases of OBP resource assignment
+conflicts.  The only truly valid ones are ROM resource conflicts.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ arch/sparc64/kernel/pci_common.c |   29 ++++++++++-------------------
+ arch/sparc64/kernel/pci_sabre.c  |   23 +++++++++++++++++++----
+ 2 files changed, 29 insertions(+), 23 deletions(-)
+
+--- linux-2.6.18.1.orig/arch/sparc64/kernel/pci_common.c
++++ linux-2.6.18.1/arch/sparc64/kernel/pci_common.c
+@@ -330,19 +330,6 @@ __init get_device_resource(struct linux_
+ 	return res;
+ }
+ 
+-static int __init pdev_resource_collisions_expected(struct pci_dev *pdev)
+-{
+-	if (pdev->vendor != PCI_VENDOR_ID_SUN)
+-		return 0;
+-
+-	if (pdev->device == PCI_DEVICE_ID_SUN_RIO_EBUS ||
+-	    pdev->device == PCI_DEVICE_ID_SUN_RIO_1394 ||
+-	    pdev->device == PCI_DEVICE_ID_SUN_RIO_USB)
+-		return 1;
+-
+-	return 0;
+-}
+-
+ static void __init pdev_record_assignments(struct pci_pbm_info *pbm,
+ 					   struct pci_dev *pdev)
+ {
+@@ -400,19 +387,23 @@ static void __init pdev_record_assignmen
+ 		pbm->parent->resource_adjust(pdev, res, root);
+ 
+ 		if (request_resource(root, res) < 0) {
++			int rnum;
++
+ 			/* OK, there is some conflict.  But this is fine
+ 			 * since we'll reassign it in the fixup pass.
+ 			 *
+-			 * We notify the user that OBP made an error if it
+-			 * is a case we don't expect.
++			 * Do not print the warning for ROM resources
++			 * as such a conflict is quite common and
++			 * harmless as the ROM bar is disabled.
+ 			 */
+-			if (!pdev_resource_collisions_expected(pdev)) {
+-				printk(KERN_ERR "PCI: Address space collision on region %ld "
++			rnum = (res - &pdev->resource[0]);
++			if (rnum != PCI_ROM_RESOURCE)
++				printk(KERN_ERR "PCI: Resource collision, "
++				       "region %d "
+ 				       "[%016lx:%016lx] of device %s\n",
+-				       (res - &pdev->resource[0]),
++				       rnum,
+ 				       res->start, res->end,
+ 				       pci_name(pdev));
+-			}
+ 		}
+ 	}
+ }
+--- linux-2.6.18.1.orig/arch/sparc64/kernel/pci_sabre.c
++++ linux-2.6.18.1/arch/sparc64/kernel/pci_sabre.c
+@@ -1196,7 +1196,7 @@ static void pbm_register_toplevel_resour
+ 					    &pbm->mem_space);
+ }
+ 
+-static void sabre_pbm_init(struct pci_controller_info *p, struct device_node *dp, u32 dma_begin)
++static void sabre_pbm_init(struct pci_controller_info *p, struct device_node *dp, u32 dma_start, u32 dma_end)
+ {
+ 	struct pci_pbm_info *pbm;
+ 	struct device_node *node;
+@@ -1261,6 +1261,8 @@ static void sabre_pbm_init(struct pci_co
+ 		node = node->sibling;
+ 	}
+ 	if (simbas_found == 0) {
++		struct resource *rp;
++
+ 		/* No APBs underneath, probably this is a hummingbird
+ 		 * system.
+ 		 */
+@@ -1302,8 +1304,10 @@ static void sabre_pbm_init(struct pci_co
+ 		pbm->io_space.end   = pbm->io_space.start + (1UL << 24) - 1UL;
+ 		pbm->io_space.flags = IORESOURCE_IO;
+ 
+-		pbm->mem_space.start = p->pbm_A.controller_regs + SABRE_MEMSPACE;
+-		pbm->mem_space.end   = pbm->mem_space.start + (unsigned long)dma_begin - 1UL;
++		pbm->mem_space.start =
++			(p->pbm_A.controller_regs + SABRE_MEMSPACE);
++		pbm->mem_space.end =
++			(pbm->mem_space.start + ((1UL << 32UL) - 1UL));
+ 		pbm->mem_space.flags = IORESOURCE_MEM;
+ 
+ 		if (request_resource(&ioport_resource, &pbm->io_space) < 0) {
+@@ -1315,6 +1319,17 @@ static void sabre_pbm_init(struct pci_co
+ 			prom_halt();
+ 		}
+ 
++		rp = kmalloc(sizeof(*rp), GFP_KERNEL);
++		if (!rp) {
++			prom_printf("Cannot allocate IOMMU resource.\n");
++			prom_halt();
++		}
++		rp->name = "IOMMU";
++		rp->start = pbm->mem_space.start + (unsigned long) dma_start;
++		rp->end = pbm->mem_space.start + (unsigned long) dma_end - 1UL;
++		rp->flags = IORESOURCE_BUSY;
++		request_resource(&pbm->mem_space, rp);
++
+ 		pci_register_legacy_regions(&pbm->io_space,
+ 					    &pbm->mem_space);
+ 	}
+@@ -1450,5 +1465,5 @@ void sabre_init(struct device_node *dp, 
+ 	/*
+ 	 * Look for APB underneath.
+ 	 */
+-	sabre_pbm_init(p, dp, vdma[0]);
++	sabre_pbm_init(p, dp, vdma[0], vdma[0] + vdma[1]);
+ }
diff --git a/queue-2.6.18/tcp-cubic-scaling-error.patch b/queue-2.6.18/tcp-cubic-scaling-error.patch
new file mode 100644
index 00000000000..ee194368791
--- /dev/null
+++ b/queue-2.6.18/tcp-cubic-scaling-error.patch
@@ -0,0 +1,56 @@
+From stable-bounces@linux.kernel.org  Mon Oct 30 14:50:53 2006
+Date: Mon, 30 Oct 2006 14:47:35 -0800
+From: Stephen Hemminger <shemminger@osdl.org>
+To: stable@kernel.org
+Subject: tcp: cubic scaling error
+
+Doug Leith observed a discrepancy between the version of CUBIC described
+in the papers and the version in 2.6.18. A math error related to scaling
+causes Cubic to grow too slowly.
+
+Patch is from "Sangtae Ha" <sha2@ncsu.edu>. I validated that
+it does fix the problems.
+
+See the following to show behavior over 500ms 100 Mbit link.
+
+Sender (2.6.19-rc3) ---  Bridge (2.6.18-rt7) ------- Receiver (2.6.19-rc3)
+                    1G      [netem]           100M
+
+	http://developer.osdl.org/shemminger/tcp/2.6.19-rc3/cubic-orig.png
+	http://developer.osdl.org/shemminger/tcp/2.6.19-rc3/cubic-fix.png
+
+Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/ipv4/tcp_cubic.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- linux-2.6.18.1.orig/net/ipv4/tcp_cubic.c
++++ linux-2.6.18.1/net/ipv4/tcp_cubic.c
+@@ -190,7 +190,7 @@ static inline void bictcp_update(struct 
+          */
+ 
+ 	/* change the unit from HZ to bictcp_HZ */
+-        t = ((tcp_time_stamp + ca->delay_min - ca->epoch_start)
++        t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start)
+ 	     << BICTCP_HZ) / HZ;
+ 
+         if (t < ca->bic_K)		/* t - K */
+@@ -259,7 +259,7 @@ static inline void measure_delay(struct 
+ 	    (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+ 		return;
+ 
+-	delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
++	delay = (tcp_time_stamp - tp->rx_opt.rcv_tsecr)<<3;
+ 	if (delay == 0)
+ 		delay = 1;
+ 
+@@ -366,7 +366,7 @@ static int __init cubictcp_register(void
+ 
+ 	beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
+ 
+-	cube_rtt_scale = (bic_scale << 3) / 10;	/* 1024*c/rtt */
++	cube_rtt_scale = (bic_scale * 10);	/* 1024*c/rtt */
+ 
+ 	/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
+ 	 *  so K = cubic_root( (wmax-cwnd)*rtt/c )
diff --git a/queue-2.6.18/use-min-of-two-prio-settings-in-calculating-distress-for-reclaim.patch b/queue-2.6.18/use-min-of-two-prio-settings-in-calculating-distress-for-reclaim.patch
new file mode 100644
index 00000000000..62b44783b35
--- /dev/null
+++ b/queue-2.6.18/use-min-of-two-prio-settings-in-calculating-distress-for-reclaim.patch
@@ -0,0 +1,67 @@
+From bbdb396a60b2ebf7de3b717991e5d3e28c8b7bbd Mon Sep 17 00:00:00 2001
+From: Martin Bligh <mbligh@google.com>
+Date: Sat, 28 Oct 2006 10:38:25 -0700
+Subject: Use min of two prio settings in calculating distress for reclaim
+
+If try_to_free_pages / balance_pgdat are called with a gfp_mask specifying
+GFP_IO and/or GFP_FS, they will reclaim the requisite number of pages, and the
+reset prev_priority to DEF_PRIORITY (or to some other high (ie: unurgent)
+value).
+
+However, another reclaimer without those gfp_mask flags set (say, GFP_NOIO)
+may still be struggling to reclaim pages.  The concurrent overwrite of
+zone->prev_priority will cause this GFP_NOIO thread to unexpectedly cease
+deactivating mapped pages, thus causing reclaim difficulties.
+
+Fix this is to key the distress calculation not off zone->prev_priority, but
+also take into account the local caller's priority by using
+min(zone->prev_priority, sc->priority)
+
+Signed-off-by: Martin J. Bligh <mbligh@google.com>
+Cc: Nick Piggin <nickpiggin@yahoo.com.au>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ mm/vmscan.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- linux-2.6.18.1.orig/mm/vmscan.c
++++ linux-2.6.18.1/mm/vmscan.c
+@@ -727,7 +727,7 @@ static inline void note_zone_scanning_pr
+  * But we had to alter page->flags anyway.
+  */
+ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
+-				struct scan_control *sc)
++				struct scan_control *sc, int priority)
+ {
+ 	unsigned long pgmoved;
+ 	int pgdeactivate = 0;
+@@ -748,7 +748,7 @@ static void shrink_active_list(unsigned 
+ 		 * `distress' is a measure of how much trouble we're having
+ 		 * reclaiming pages.  0 -> no problems.  100 -> great trouble.
+ 		 */
+-		distress = 100 >> zone->prev_priority;
++		distress = 100 >> min(zone->prev_priority, priority);
+ 
+ 		/*
+ 		 * The point of this algorithm is to decide when to start
+@@ -899,7 +899,7 @@ static unsigned long shrink_zone(int pri
+ 			nr_to_scan = min(nr_active,
+ 					(unsigned long)sc->swap_cluster_max);
+ 			nr_active -= nr_to_scan;
+-			shrink_active_list(nr_to_scan, zone, sc);
++			shrink_active_list(nr_to_scan, zone, sc, priority);
+ 		}
+ 
+ 		if (nr_inactive) {
+@@ -1341,7 +1341,7 @@ static unsigned long shrink_all_zones(un
+ 			if (zone->nr_scan_active >= nr_pages || pass > 3) {
+ 				zone->nr_scan_active = 0;
+ 				nr_to_scan = min(nr_pages, zone->nr_active);
+-				shrink_active_list(nr_to_scan, zone, sc);
++				shrink_active_list(nr_to_scan, zone, sc, prio);
+ 			}
+ 		}
+ 
diff --git a/queue-2.6.18/vmscan-fix-temp_priority-race.patch b/queue-2.6.18/vmscan-fix-temp_priority-race.patch
new file mode 100644
index 00000000000..593778dcfbe
--- /dev/null
+++ b/queue-2.6.18/vmscan-fix-temp_priority-race.patch
@@ -0,0 +1,221 @@
+From 3bb1a852ab6c9cdf211a2f4a2f502340c8c38eca Mon Sep 17 00:00:00 2001
+From: Martin Bligh <mbligh@mbligh.org>
+Date: Sat, 28 Oct 2006 10:38:24 -0700
+Subject: vmscan: Fix temp_priority race
+
+The temp_priority field in zone is racy, as we can walk through a reclaim
+path, and just before we copy it into prev_priority, it can be overwritten
+(say with DEF_PRIORITY) by another reclaimer.
+
+The same bug is contained in both try_to_free_pages and balance_pgdat, but
+it is fixed slightly differently.  In balance_pgdat, we keep a separate
+priority record per zone in a local array.  In try_to_free_pages there is
+no need to do this, as the priority level is the same for all zones that we
+reclaim from.
+
+Impact of this bug is that temp_priority is copied into prev_priority, and
+setting this artificially high causes reclaimers to set distress
+artificially low.  They then fail to reclaim mapped pages, when they are,
+in fact, under severe memory pressure (their priority may be as low as 0).
+This causes the OOM killer to fire incorrectly.
+
+From: Andrew Morton <akpm@osdl.org>
+
+__zone_reclaim() isn't modifying zone->prev_priority.  But zone->prev_priority
+is used in the decision whether or not to bring mapped pages onto the inactive
+list.  Hence there's a risk here that __zone_reclaim() will fail because
+zone->prev_priority ir large (ie: low urgency) and lots of mapped pages end up
+stuck on the active list.
+
+Fix that up by decreasing (ie making more urgent) zone->prev_priority as
+__zone_reclaim() scans the zone's pages.
+
+This bug perhaps explains why ZONE_RECLAIM_PRIORITY was created.  It should be
+possible to remove that now, and to just start out at DEF_PRIORITY?
+
+Cc: Nick Piggin <nickpiggin@yahoo.com.au>
+Cc: Christoph Lameter <clameter@engr.sgi.com>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+[chrisw: minor wiggle to fit -stable]
+---
+ include/linux/mmzone.h |    6 -----
+ mm/page_alloc.c        |    2 -
+ mm/vmscan.c            |   55 ++++++++++++++++++++++++++++++++++++-------------
+ mm/vmstat.c            |    2 -
+ 4 files changed, 43 insertions(+), 22 deletions(-)
+
+--- linux-2.6.18.1.orig/include/linux/mmzone.h
++++ linux-2.6.18.1/include/linux/mmzone.h
+@@ -200,13 +200,9 @@ struct zone {
+ 	 * under - it drives the swappiness decision: whether to unmap mapped
+ 	 * pages.
+ 	 *
+-	 * temp_priority is used to remember the scanning priority at which
+-	 * this zone was successfully refilled to free_pages == pages_high.
+-	 *
+-	 * Access to both these fields is quite racy even on uniprocessor.  But
++	 * Access to both this field is quite racy even on uniprocessor.  But
+ 	 * it is expected to average out OK.
+ 	 */
+-	int temp_priority;
+ 	int prev_priority;
+ 
+ 
+--- linux-2.6.18.1.orig/mm/page_alloc.c
++++ linux-2.6.18.1/mm/page_alloc.c
+@@ -2021,7 +2021,7 @@ static void __meminit free_area_init_cor
+ 		zone->zone_pgdat = pgdat;
+ 		zone->free_pages = 0;
+ 
+-		zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
++		zone->prev_priority = DEF_PRIORITY;
+ 
+ 		zone_pcp_init(zone);
+ 		INIT_LIST_HEAD(&zone->active_list);
+--- linux-2.6.18.1.orig/mm/vmscan.c
++++ linux-2.6.18.1/mm/vmscan.c
+@@ -696,6 +696,20 @@ done:
+ }
+ 
+ /*
++ * We are about to scan this zone at a certain priority level.  If that priority
++ * level is smaller (ie: more urgent) than the previous priority, then note
++ * that priority level within the zone.  This is done so that when the next
++ * process comes in to scan this zone, it will immediately start out at this
++ * priority level rather than having to build up its own scanning priority.
++ * Here, this priority affects only the reclaim-mapped threshold.
++ */
++static inline void note_zone_scanning_priority(struct zone *zone, int priority)
++{
++	if (priority < zone->prev_priority)
++		zone->prev_priority = priority;
++}
++
++/*
+  * This moves pages from the active list to the inactive list.
+  *
+  * We move them the other way if the page is referenced by one or more
+@@ -934,9 +948,7 @@ static unsigned long shrink_zones(int pr
+ 		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+ 			continue;
+ 
+-		zone->temp_priority = priority;
+-		if (zone->prev_priority > priority)
+-			zone->prev_priority = priority;
++		note_zone_scanning_priority(zone, priority);
+ 
+ 		if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ 			continue;	/* Let kswapd poll it */
+@@ -984,7 +996,6 @@ unsigned long try_to_free_pages(struct z
+ 		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+ 			continue;
+ 
+-		zone->temp_priority = DEF_PRIORITY;
+ 		lru_pages += zone->nr_active + zone->nr_inactive;
+ 	}
+ 
+@@ -1022,13 +1033,22 @@ unsigned long try_to_free_pages(struct z
+ 			blk_congestion_wait(WRITE, HZ/10);
+ 	}
+ out:
++	/*
++	 * Now that we've scanned all the zones at this priority level, note
++	 * that level within the zone so that the next thread which performs
++	 * scanning of this zone will immediately start out at this priority
++	 * level.  This affects only the decision whether or not to bring
++	 * mapped pages onto the inactive list.
++	 */
++	if (priority < 0)
++		priority = 0;
+ 	for (i = 0; zones[i] != 0; i++) {
+ 		struct zone *zone = zones[i];
+ 
+ 		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+ 			continue;
+ 
+-		zone->prev_priority = zone->temp_priority;
++		zone->prev_priority = priority;
+ 	}
+ 	return ret;
+ }
+@@ -1068,6 +1088,11 @@ static unsigned long balance_pgdat(pg_da
+ 		.swap_cluster_max = SWAP_CLUSTER_MAX,
+ 		.swappiness = vm_swappiness,
+ 	};
++	/*
++	 * temp_priority is used to remember the scanning priority at which
++	 * this zone was successfully refilled to free_pages == pages_high.
++	 */
++	int temp_priority[MAX_NR_ZONES];
+ 
+ loop_again:
+ 	total_scanned = 0;
+@@ -1075,11 +1100,8 @@ loop_again:
+ 	sc.may_writepage = !laptop_mode;
+ 	count_vm_event(PAGEOUTRUN);
+ 
+-	for (i = 0; i < pgdat->nr_zones; i++) {
+-		struct zone *zone = pgdat->node_zones + i;
+-
+-		zone->temp_priority = DEF_PRIORITY;
+-	}
++	for (i = 0; i < pgdat->nr_zones; i++)
++		temp_priority[i] = DEF_PRIORITY;
+ 
+ 	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+ 		int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
+@@ -1140,10 +1162,9 @@ scan:
+ 			if (!zone_watermark_ok(zone, order, zone->pages_high,
+ 					       end_zone, 0))
+ 				all_zones_ok = 0;
+-			zone->temp_priority = priority;
+-			if (zone->prev_priority > priority)
+-				zone->prev_priority = priority;
++			temp_priority[i] = priority;
+ 			sc.nr_scanned = 0;
++			note_zone_scanning_priority(zone, priority);
+ 			nr_reclaimed += shrink_zone(priority, zone, &sc);
+ 			reclaim_state->reclaimed_slab = 0;
+ 			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
+@@ -1183,10 +1204,15 @@ scan:
+ 			break;
+ 	}
+ out:
++	/*
++	 * Note within each zone the priority level at which this zone was
++	 * brought into a happy state.  So that the next thread which scans this
++	 * zone will start out at that priority level.
++	 */
+ 	for (i = 0; i < pgdat->nr_zones; i++) {
+ 		struct zone *zone = pgdat->node_zones + i;
+ 
+-		zone->prev_priority = zone->temp_priority;
++		zone->prev_priority = temp_priority[i];
+ 	}
+ 	if (!all_zones_ok) {
+ 		cond_resched();
+@@ -1570,6 +1596,7 @@ static int __zone_reclaim(struct zone *z
+ 		 */
+ 		priority = ZONE_RECLAIM_PRIORITY;
+ 		do {
++			note_zone_scanning_priority(zone, priority);
+ 			nr_reclaimed += shrink_zone(priority, zone, &sc);
+ 			priority--;
+ 		} while (priority >= 0 && nr_reclaimed < nr_pages);
+--- linux-2.6.18.1.orig/mm/vmstat.c
++++ linux-2.6.18.1/mm/vmstat.c
+@@ -586,11 +586,9 @@ static int zoneinfo_show(struct seq_file
+ 		seq_printf(m,
+ 			   "\n  all_unreclaimable: %u"
+ 			   "\n  prev_priority:     %i"
+-			   "\n  temp_priority:     %i"
+ 			   "\n  start_pfn:         %lu",
+ 			   zone->all_unreclaimable,
+ 			   zone->prev_priority,
+-			   zone->temp_priority,
+ 			   zone->zone_start_pfn);
+ 		spin_unlock_irqrestore(&zone->lock, flags);
+ 		seq_putc(m, '\n');