--- /dev/null
+From stable-bounces@linux.kernel.org Mon Oct 30 17:54:57 2006
+Date: Tue, 31 Oct 2006 12:51:45 +1100
+From: NeilBrown <neilb@suse.de>
+To: Andrew Morton <akpm@osdl.org>
+Cc: stable@kernel.org, Jens Axboe <jens.axboe@oracle.com>
+Subject: [stable] [PATCH] Check bio address after mapping through partitions.
+
+Partitions are not limited to live within a device. So
+we should range check after partition mapping.
+
+Note that 'maxsector' was being used for two different things. I have
+split off the second usage into 'old_sector' so that maxsector can be
+still be used for it's primary usage later in the function.
+
+Cc: Jens Axboe <jens.axboe@oracle.com>
+Signed-off-by: Neil Brown <neilb@suse.de>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ block/ll_rw_blk.c | 24 ++++++++++++++++++++----
+ 1 file changed, 20 insertions(+), 4 deletions(-)
+
+--- linux-2.6.18.1.orig/block/ll_rw_blk.c
++++ linux-2.6.18.1/block/ll_rw_blk.c
+@@ -3021,6 +3021,7 @@ void generic_make_request(struct bio *bi
+ {
+ request_queue_t *q;
+ sector_t maxsector;
++ sector_t old_sector;
+ int ret, nr_sectors = bio_sectors(bio);
+ dev_t old_dev;
+
+@@ -3049,7 +3050,7 @@ void generic_make_request(struct bio *bi
+ * NOTE: we don't repeat the blk_size check for each new device.
+ * Stacking drivers are expected to know what they are doing.
+ */
+- maxsector = -1;
++ old_sector = -1;
+ old_dev = 0;
+ do {
+ char b[BDEVNAME_SIZE];
+@@ -3083,15 +3084,30 @@ end_io:
+ */
+ blk_partition_remap(bio);
+
+- if (maxsector != -1)
++ if (old_sector != -1)
+ blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
+- maxsector);
++ old_sector);
+
+ blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+
+- maxsector = bio->bi_sector;
++ old_sector = bio->bi_sector;
+ old_dev = bio->bi_bdev->bd_dev;
+
++ maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
++ if (maxsector) {
++ sector_t sector = bio->bi_sector;
++
++ if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
++ /*
++ * This may well happen - partitions are not checked
++ * to make sure they are within the size of the
++ * whole device.
++ */
++ handle_bad_sector(bio);
++ goto end_io;
++ }
++ }
++
+ ret = q->make_request_fn(q, bio);
+ } while (ret);
+ }
--- /dev/null
+From fca178c0c6e8d52a1875be36b070f30884ebfae9 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@tv-sign.ru>
+Date: Sat, 28 Oct 2006 10:38:49 -0700
+Subject: fill_tgid: fix task_struct leak and possible oops
+
+1. fill_tgid() forgets to do put_task_struct(first).
+
+2. release_task(first) can happen after fill_tgid() drops tasklist_lock,
+ it is unsafe to dereference first->signal.
+
+This is a temporary fix, imho the locking should be reworked.
+
+Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
+Cc: Shailabh Nagar <nagar@watson.ibm.com>
+Cc: Balbir Singh <balbir@in.ibm.com>
+Cc: Jay Lan <jlan@sgi.com>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ kernel/taskstats.c | 15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- linux-2.6.18.1.orig/kernel/taskstats.c
++++ linux-2.6.18.1/kernel/taskstats.c
+@@ -229,14 +229,17 @@ static int fill_tgid(pid_t tgid, struct
+ } else
+ get_task_struct(first);
+
+- /* Start with stats from dead tasks */
+- spin_lock_irqsave(&first->signal->stats_lock, flags);
+- if (first->signal->stats)
+- memcpy(stats, first->signal->stats, sizeof(*stats));
+- spin_unlock_irqrestore(&first->signal->stats_lock, flags);
+
+ tsk = first;
+ read_lock(&tasklist_lock);
++ /* Start with stats from dead tasks */
++ if (first->signal) {
++ spin_lock_irqsave(&first->signal->stats_lock, flags);
++ if (first->signal->stats)
++ memcpy(stats, first->signal->stats, sizeof(*stats));
++ spin_unlock_irqrestore(&first->signal->stats_lock, flags);
++ }
++
+ do {
+ if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk))
+ continue;
+@@ -256,7 +259,7 @@ static int fill_tgid(pid_t tgid, struct
+ * Accounting subsytems can also add calls here to modify
+ * fields of taskstats.
+ */
+-
++ put_task_struct(first);
+ return 0;
+ }
+
--- /dev/null
+From stable-bounces@linux.kernel.org Mon Oct 30 15:14:51 2006
+Date: Mon, 30 Oct 2006 15:11:21 -0800 (PST)
+From: David Miller <davem@davemloft.net>
+To: stable@kernel.org
+Cc: bunk@stusta.de
+Subject: IPV6: fix lockup via /proc/net/ip6_flowlabel
+
+From: James Morris <jmorris@namei.org>
+
+There's a bug in the seqfile handling for /proc/net/ip6_flowlabel, where,
+after finding a flowlabel, the code will loop forever not finding any
+further flowlabels, first traversing the rest of the hash bucket then just
+looping.
+
+This patch fixes the problem by breaking after the hash bucket has been
+traversed.
+
+Note that this bug can cause lockups and oopses, and is trivially invoked
+by an unpriveleged user.
+
+Signed-off-by: James Morris <jmorris@namei.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/ipv6/ip6_flowlabel.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- linux-2.6.18.1.orig/net/ipv6/ip6_flowlabel.c
++++ linux-2.6.18.1/net/ipv6/ip6_flowlabel.c
+@@ -587,6 +587,8 @@ static struct ip6_flowlabel *ip6fl_get_n
+ while (!fl) {
+ if (++state->bucket <= FL_HASH_MASK)
+ fl = fl_ht[state->bucket];
++ else
++ break;
+ }
+ return fl;
+ }
--- /dev/null
+From 04518bfe8eac2e82b476fb2b0093527adc2bc791 Mon Sep 17 00:00:00 2001
+From: Jeff Garzik <jeff@garzik.org>
+Date: Tue, 17 Oct 2006 00:10:39 -0700
+Subject: ISDN: fix drivers, by handling errors thrown by ->readstat()
+
+This is a particularly ugly on-failure bug, possibly security, since the
+lack of error handling here is covering up another class of bug: failure to
+handle copy_to_user() return values.
+
+The I4L API function ->readstat() returns an integer, and by looking at
+several existing driver implementations, it is clear that a negative return
+value was meant to indicate an error.
+
+Given that several drivers already return a negative value indicating an
+errno-style error, the current code would blindly accept that [negative]
+value as a valid amount of bytes read. Obvious damage ensues.
+
+Correcting ->readstat() handling to properly notice errors fixes the
+existing code to work correctly on error, and enables future patches to
+more easily indicate errors during operation.
+
+Signed-off-by: Jeff Garzik <jeff@garzik.org>
+Cc: Karsten Keil <kkeil@suse.de>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ drivers/isdn/i4l/isdn_common.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- linux-2.6.18.1.orig/drivers/isdn/i4l/isdn_common.c
++++ linux-2.6.18.1/drivers/isdn/i4l/isdn_common.c
+@@ -1134,9 +1134,12 @@ isdn_read(struct file *file, char __user
+ if (dev->drv[drvidx]->interface->readstat) {
+ if (count > dev->drv[drvidx]->stavail)
+ count = dev->drv[drvidx]->stavail;
+- len = dev->drv[drvidx]->interface->
+- readstat(buf, count, drvidx,
+- isdn_minor2chan(minor));
++ len = dev->drv[drvidx]->interface->readstat(buf, count,
++ drvidx, isdn_minor2chan(minor));
++ if (len < 0) {
++ retval = len;
++ goto out;
++ }
+ } else {
+ len = 0;
+ }
--- /dev/null
+From c333526f489044be2b93085720eb898f0037b346 Mon Sep 17 00:00:00 2001
+From: Alan Cox <alan@lxorguk.ukuu.org.uk>
+Date: Sat, 28 Oct 2006 10:38:57 -0700
+Subject: JMB 368 PATA detection
+
+The Jmicron JMB368 is PATA only so has the PATA on function zero. Don't
+therefore skip function zero on this device when probing
+
+Signed-off-by: Alan Cox <alan@redhat.com>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ drivers/ide/pci/generic.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- linux-2.6.18.1.orig/drivers/ide/pci/generic.c
++++ linux-2.6.18.1/drivers/ide/pci/generic.c
+@@ -242,8 +242,10 @@ static int __devinit generic_init_one(st
+ (!(PCI_FUNC(dev->devfn) & 1)))
+ goto out;
+
+- if (dev->vendor == PCI_VENDOR_ID_JMICRON && PCI_FUNC(dev->devfn) != 1)
+- goto out;
++ if (dev->vendor == PCI_VENDOR_ID_JMICRON) {
++ if (dev->device != PCI_DEVICE_ID_JMICRON_JMB368 && PCI_FUNC(dev->devfn) != 1)
++ goto out;
++ }
+
+ if (dev->vendor != PCI_VENDOR_ID_JMICRON) {
+ pci_read_config_word(dev, PCI_COMMAND, &command);
--- /dev/null
+From fd6840714d9cf6e93f1d42b904860a94df316b85 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Tue, 5 Sep 2006 12:27:44 -0400
+Subject: NFS: nfs_lookup - don't hash dentry when optimising away the lookup
+
+If the open intents tell us that a given lookup is going to result in a,
+exclusive create, we currently optimize away the lookup call itself. The
+reason is that the lookup would not be atomic with the create RPC call, so
+why do it in the first place?
+
+A problem occurs, however, if the VFS aborts the exclusive create operation
+after the lookup, but before the call to create the file/directory: in this
+case we will end up with a hashed negative dentry in the dcache that has
+never been looked up.
+Fix this by only actually hashing the dentry once the create operation has
+been successfully completed.
+
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ fs/nfs/dir.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- linux-2.6.18.1.orig/fs/nfs/dir.c
++++ linux-2.6.18.1/fs/nfs/dir.c
+@@ -902,9 +902,15 @@ static struct dentry *nfs_lookup(struct
+
+ lock_kernel();
+
+- /* If we're doing an exclusive create, optimize away the lookup */
+- if (nfs_is_exclusive_create(dir, nd))
+- goto no_entry;
++ /*
++ * If we're doing an exclusive create, optimize away the lookup
++ * but don't hash the dentry.
++ */
++ if (nfs_is_exclusive_create(dir, nd)) {
++ d_instantiate(dentry, NULL);
++ res = NULL;
++ goto out_unlock;
++ }
+
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
+ if (error == -ENOENT)
+@@ -1156,6 +1162,8 @@ int nfs_instantiate(struct dentry *dentr
+ if (IS_ERR(inode))
+ goto out_err;
+ d_instantiate(dentry, inode);
++ if (d_unhashed(dentry))
++ d_rehash(dentry);
+ return 0;
+ out_err:
+ d_drop(dentry);
--- /dev/null
+From 3560cc5ec3488b20d927f7160a21a0df1d1fda20 Mon Sep 17 00:00:00 2001
+From: Karsten Wiese <annabellesgarden@yahoo.de>
+Date: Fri, 20 Oct 2006 14:45:36 -0700
+Subject: PCI: Remove quirk_via_abnormal_poweroff
+
+My K8T800 mobo resumes fine from suspend to ram with and without patch
+applied against 2.6.18.
+
+quirk_via_abnormal_poweroff makes some boards not boot 2.6.18, so IMO patch
+should go to head, 2.6.18.2 and everywhere "ACPI: ACPICA 20060623" has been
+applied.
+
+
+Remove quirk_via_abnormal_poweroff
+
+Obsoleted by "ACPI: ACPICA 20060623":
+<snip>
+ Implemented support for "ignored" bits in the ACPI
+ registers. According to the ACPI specification, these
+ bits should be preserved when writing the registers via
+ a read/modify/write cycle. There are 3 bits preserved
+ in this manner: PM1_CONTROL[0] (SCI_EN), PM1_CONTROL[9],
+ and PM1_STATUS[11].
+ http://bugzilla.kernel.org/show_bug.cgi?id=3691
+</snip>
+
+Signed-off-by: Karsten Wiese <fzu@wemgehoertderstaat.de>
+Cc: Bob Moore <robert.moore@intel.com>
+Cc: "Brown, Len" <len.brown@intel.com>
+Acked-by: Dave Jones <davej@redhat.com>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ drivers/pci/quirks.c | 27 ---------------------------
+ 1 file changed, 27 deletions(-)
+
+--- linux-2.6.18.1.orig/drivers/pci/quirks.c
++++ linux-2.6.18.1/drivers/pci/quirks.c
+@@ -685,33 +685,6 @@ static void __devinit quirk_vt82c598_id(
+ }
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C597_0, quirk_vt82c598_id );
+
+-#ifdef CONFIG_ACPI_SLEEP
+-
+-/*
+- * Some VIA systems boot with the abnormal status flag set. This can cause
+- * the BIOS to re-POST the system on resume rather than passing control
+- * back to the OS. Clear the flag on boot
+- */
+-static void __devinit quirk_via_abnormal_poweroff(struct pci_dev *dev)
+-{
+- u32 reg;
+-
+- acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK, ACPI_REGISTER_PM1_STATUS,
+- ®);
+-
+- if (reg & 0x800) {
+- printk("Clearing abnormal poweroff flag\n");
+- acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
+- ACPI_REGISTER_PM1_STATUS,
+- (u16)0x800);
+- }
+-}
+-
+-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, quirk_via_abnormal_poweroff);
+-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, quirk_via_abnormal_poweroff);
+-
+-#endif
+-
+ /*
+ * CardBus controllers have a legacy base address that enables them
+ * to respond as i82365 pcmcia controllers. We don't want them to
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Oct 17 00:12:55 2006
+Date: Tue, 17 Oct 2006 00:09:39 -0700
+From: akpm@osdl.org
+To: torvalds@osdl.org
+Cc: akpm@osdl.org, dwalker@mvista.com, pmattis@google.com, johnstul@us.ibm.com, toyoa@mvista.com, stable@kernel.org, zippel@linux-m68k.org, mbligh@google.com, spark@google.com, rohitseth@google.com, tglx@linutronix.de, mingo@elte.hu, roland@redhat.com
+Subject: posix-cpu-timers: prevent signal delivery starvation
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+The integer divisions in the timer accounting code can round the result
+down to 0. Adding 0 is without effect and the signal delivery stops.
+
+Clamp the division result to minimum 1 to avoid this.
+
+Problem was reported by Seongbae Park <spark@google.com>, who provided
+also an inital patch.
+
+Roland sayeth:
+
+ I have had some more time to think about the problem, and to reproduce it
+ using Toyo's test case. For the record, if my understanding of the problem
+ is correct, this happens only in one very particular case. First, the
+ expiry time has to be so soon that in cputime_t units (usually 1s/HZ ticks)
+ it's < nthreads so the division yields zero. Second, it only affects each
+ thread that is so new that its CPU time accumulation is zero so now+0 is
+ still zero and ->it_*_expires winds up staying zero. For the VIRT and PROF
+ clocks when cputime_t is tick granularity (or the SCHED clock on
+ configurations where sched_clock's value only advances on clock ticks), this
+ is not hard to arrange with new threads starting up and blocking before they
+ accumulate a whole tick of CPU time. That's what happens in Toyo's test
+ case.
+
+ Note that in general it is fine for that division to round down to zero,
+ and set each thread's expiry time to its "now" time. The problem only
+ arises with thread's whose "now" value is still zero, so that now+0 winds up
+ 0 and is interpreted as "not set" instead of ">= now". So it would be a
+ sufficient and more precise fix to just use max(ticks, 1) inside the loop
+ when setting each it_*_expires value.
+
+ But, it does no harm to round the division up to one and always advance
+ every thread's expiry time. If the thread didn't already fire timers for
+ the expiry time of "now", there is no expectation that it will do so before
+ the next tick anyway. So I followed Thomas's patch in lifting the max out
+ of the loops.
+
+ This patch also covers the reload cases, which are harder to write a test
+ for (and I didn't try). I've tested it with Toyo's case and it fixes that.
+
+
+[toyoa@mvista.com: fix: min_t -> max_t]
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Roland McGrath <roland@redhat.com>
+Cc: Daniel Walker <dwalker@mvista.com>
+Cc: Toyo Abe <toyoa@mvista.com>
+Cc: john stultz <johnstul@us.ibm.com>
+Cc: Roman Zippel <zippel@linux-m68k.org>
+Cc: Seongbae Park <spark@google.com>
+Cc: Peter Mattis <pmattis@google.com>
+Cc: Rohit Seth <rohitseth@google.com>
+Cc: Martin Bligh <mbligh@google.com>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+
+ kernel/posix-cpu-timers.c | 27 +++++++++++++++++++++------
+ 1 file changed, 21 insertions(+), 6 deletions(-)
+
+--- linux-2.6.18.1.orig/kernel/posix-cpu-timers.c
++++ linux-2.6.18.1/kernel/posix-cpu-timers.c
+@@ -88,6 +88,19 @@ static inline union cpu_time_count cpu_t
+ }
+
+ /*
++ * Divide and limit the result to res >= 1
++ *
++ * This is necessary to prevent signal delivery starvation, when the result of
++ * the division would be rounded down to 0.
++ */
++static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div)
++{
++ cputime_t res = cputime_div(time, div);
++
++ return max_t(cputime_t, res, 1);
++}
++
++/*
+ * Update expiry time from increment, and increase overrun count,
+ * given the current clock sample.
+ */
+@@ -483,8 +496,8 @@ static void process_timer_rebalance(stru
+ BUG();
+ break;
+ case CPUCLOCK_PROF:
+- left = cputime_div(cputime_sub(expires.cpu, val.cpu),
+- nthreads);
++ left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
++ nthreads);
+ do {
+ if (likely(!(t->flags & PF_EXITING))) {
+ ticks = cputime_add(prof_ticks(t), left);
+@@ -498,8 +511,8 @@ static void process_timer_rebalance(stru
+ } while (t != p);
+ break;
+ case CPUCLOCK_VIRT:
+- left = cputime_div(cputime_sub(expires.cpu, val.cpu),
+- nthreads);
++ left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
++ nthreads);
+ do {
+ if (likely(!(t->flags & PF_EXITING))) {
+ ticks = cputime_add(virt_ticks(t), left);
+@@ -515,6 +528,7 @@ static void process_timer_rebalance(stru
+ case CPUCLOCK_SCHED:
+ nsleft = expires.sched - val.sched;
+ do_div(nsleft, nthreads);
++ nsleft = max_t(unsigned long long, nsleft, 1);
+ do {
+ if (likely(!(t->flags & PF_EXITING))) {
+ ns = t->sched_time + nsleft;
+@@ -1159,12 +1173,13 @@ static void check_process_timers(struct
+
+ prof_left = cputime_sub(prof_expires, utime);
+ prof_left = cputime_sub(prof_left, stime);
+- prof_left = cputime_div(prof_left, nthreads);
++ prof_left = cputime_div_non_zero(prof_left, nthreads);
+ virt_left = cputime_sub(virt_expires, utime);
+- virt_left = cputime_div(virt_left, nthreads);
++ virt_left = cputime_div_non_zero(virt_left, nthreads);
+ if (sched_expires) {
+ sched_left = sched_expires - sched_time;
+ do_div(sched_left, nthreads);
++ sched_left = max_t(unsigned long long, sched_left, 1);
+ } else {
+ sched_left = 0;
+ }
--- /dev/null
+From 7516795739bd53175629b90fab0ad488d7a6a9f7 Mon Sep 17 00:00:00 2001
+From: Andy Whitcroft <apw@shadowen.org>
+Date: Sat, 21 Oct 2006 10:24:14 -0700
+Subject: Reintroduce NODES_SPAN_OTHER_NODES for powerpc
+
+Revert "[PATCH] Remove SPAN_OTHER_NODES config definition"
+ This reverts commit f62859bb6871c5e4a8e591c60befc8caaf54db8c.
+Revert "[PATCH] mm: remove arch independent NODES_SPAN_OTHER_NODES"
+ This reverts commit a94b3ab7eab4edcc9b2cb474b188f774c331adf7.
+
+Also update the comments to indicate that this is still required
+and where its used.
+
+Signed-off-by: Andy Whitcroft <apw@shadowen.org>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Mike Kravetz <kravetz@us.ibm.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Acked-by: Mel Gorman <mel@csn.ul.ie>
+Acked-by: Will Schmidt <will_schmidt@vnet.ibm.com>
+Cc: Christoph Lameter <clameter@sgi.com>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ arch/powerpc/Kconfig | 9 +++++++++
+ arch/powerpc/configs/pseries_defconfig | 1 +
+ include/linux/mmzone.h | 6 ++++++
+ mm/page_alloc.c | 2 ++
+ 4 files changed, 18 insertions(+)
+
+--- linux-2.6.18.1.orig/arch/powerpc/Kconfig
++++ linux-2.6.18.1/arch/powerpc/Kconfig
+@@ -729,6 +729,15 @@ config ARCH_MEMORY_PROBE
+ def_bool y
+ depends on MEMORY_HOTPLUG
+
++# Some NUMA nodes have memory ranges that span
++# other nodes. Even though a pfn is valid and
++# between a node's start and end pfns, it may not
++# reside on that node. See memmap_init_zone()
++# for details.
++config NODES_SPAN_OTHER_NODES
++ def_bool y
++ depends on NEED_MULTIPLE_NODES
++
+ config PPC_64K_PAGES
+ bool "64k page size"
+ depends on PPC64
+--- linux-2.6.18.1.orig/arch/powerpc/configs/pseries_defconfig
++++ linux-2.6.18.1/arch/powerpc/configs/pseries_defconfig
+@@ -184,6 +184,7 @@ CONFIG_SPLIT_PTLOCK_CPUS=4
+ CONFIG_MIGRATION=y
+ CONFIG_RESOURCES_64BIT=y
+ CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
++CONFIG_NODES_SPAN_OTHER_NODES=y
+ # CONFIG_PPC_64K_PAGES is not set
+ CONFIG_SCHED_SMT=y
+ CONFIG_PROC_DEVICETREE=y
+--- linux-2.6.18.1.orig/include/linux/mmzone.h
++++ linux-2.6.18.1/include/linux/mmzone.h
+@@ -632,6 +632,12 @@ void sparse_init(void);
+ #define sparse_index_init(_sec, _nid) do {} while (0)
+ #endif /* CONFIG_SPARSEMEM */
+
++#ifdef CONFIG_NODES_SPAN_OTHER_NODES
++#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid))
++#else
++#define early_pfn_in_nid(pfn, nid) (1)
++#endif
++
+ #ifndef early_pfn_valid
+ #define early_pfn_valid(pfn) (1)
+ #endif
+--- linux-2.6.18.1.orig/mm/page_alloc.c
++++ linux-2.6.18.1/mm/page_alloc.c
+@@ -1673,6 +1673,8 @@ void __meminit memmap_init_zone(unsigned
+ for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ if (!early_pfn_valid(pfn))
+ continue;
++ if (!early_pfn_in_nid(pfn, nid))
++ continue;
+ page = pfn_to_page(pfn);
+ set_page_links(page, zone, nid, pfn);
+ init_page_count(page);
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Oct 17 00:12:18 2006
+Date: Tue, 17 Oct 2006 00:09:53 -0700
+From: akpm@osdl.org
+To: torvalds@osdl.org
+Cc: akpm@osdl.org, a.zummo@towertech.it, flarramendi@gmail.com, raph@raphnet.net, stable@kernel.org
+Subject: rtc-max6902: month conversion fix
+
+From: Francisco Larramendi <flarramendi@gmail.com>
+
+Fix October-only BCD-to-binary conversion bug:
+
+ 0x08 -> 7
+ 0x09 -> 8
+ 0x10 -> 15 (!)
+ 0x11 -> 19
+
+Fixes http://bugzilla.kernel.org/show_bug.cgi?id=7361
+
+Cc: Raphael Assenat <raph@raphnet.net>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+
+ drivers/rtc/rtc-max6902.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- linux-2.6.18.1.orig/drivers/rtc/rtc-max6902.c
++++ linux-2.6.18.1/drivers/rtc/rtc-max6902.c
+@@ -137,7 +137,7 @@ static int max6902_get_datetime(struct d
+ dt->tm_min = BCD2BIN(chip->buf[2]);
+ dt->tm_hour = BCD2BIN(chip->buf[3]);
+ dt->tm_mday = BCD2BIN(chip->buf[4]);
+- dt->tm_mon = BCD2BIN(chip->buf[5] - 1);
++ dt->tm_mon = BCD2BIN(chip->buf[5]) - 1;
+ dt->tm_wday = BCD2BIN(chip->buf[6]);
+ dt->tm_year = BCD2BIN(chip->buf[7]);
+
alsa-snd_rtctimer-handle-rtc-interrupts-with-a-tasklet.patch
watchdog-sc1200wdt-fix-missing-pnp_unregister_driver.patch
fix-intel-rng-detection.patch
+posix-cpu-timers-prevent-signal-delivery-starvation.patch
+rtc-max6902-month-conversion-fix.patch
+isdn-fix-drivers-by-handling-errors-thrown-by-readstat.patch
+sparc64-fix-pci-memory-space-root-resource-on-hummingbird.patch
+pci-remove-quirk_via_abnormal_poweroff.patch
+reintroduce-nodes_span_other_nodes-for-powerpc.patch
+nfs-nfs_lookup-don-t-hash-dentry-when-optimising-away-the-lookup.patch
+vmscan-fix-temp_priority-race.patch
+use-min-of-two-prio-settings-in-calculating-distress-for-reclaim.patch
+fill_tgid-fix-task_struct-leak-and-possible-oops.patch
+jmb-368-pata-detection.patch
+tcp-cubic-scaling-error.patch
+ipv6-fix-lockup-via-proc-net-ip6_flowlabel.patch
+check-bio-address-after-mapping-through-partitions.patch
--- /dev/null
+From stable-bounces@linux.kernel.org Wed Oct 18 13:40:37 2006
+Date: Wed, 18 Oct 2006 13:38:49 -0700 (PDT)
+From: David Miller <davem@davemloft.net>
+To: stable@kernel.org
+Subject: SPARC64: Fix PCI memory space root resource on Hummingbird.
+
+For Hummingbird PCI controllers, we should create the root
+PCI memory space resource as the full 4GB area, and then
+allocate the IOMMU DMA translation window out of there.
+
+The old code just assumed that the IOMMU DMA translation base
+to the top of the 4GB area was unusable. This is not true on
+many systems such as SB100 and SB150, where the IOMMU DMA
+translation window sits at 0xc0000000->0xdfffffff.
+
+So what would happen is that any device mapped by the firmware
+at the top section 0xe0000000->0xffffffff would get remapped
+by Linux somewhere else leading to all kinds of problems and
+boot failures.
+
+While we're here, report more cases of OBP resource assignment
+conflicts. The only truly valid ones are ROM resource conflicts.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ arch/sparc64/kernel/pci_common.c | 29 ++++++++++-------------------
+ arch/sparc64/kernel/pci_sabre.c | 23 +++++++++++++++++++----
+ 2 files changed, 29 insertions(+), 23 deletions(-)
+
+--- linux-2.6.18.1.orig/arch/sparc64/kernel/pci_common.c
++++ linux-2.6.18.1/arch/sparc64/kernel/pci_common.c
+@@ -330,19 +330,6 @@ __init get_device_resource(struct linux_
+ return res;
+ }
+
+-static int __init pdev_resource_collisions_expected(struct pci_dev *pdev)
+-{
+- if (pdev->vendor != PCI_VENDOR_ID_SUN)
+- return 0;
+-
+- if (pdev->device == PCI_DEVICE_ID_SUN_RIO_EBUS ||
+- pdev->device == PCI_DEVICE_ID_SUN_RIO_1394 ||
+- pdev->device == PCI_DEVICE_ID_SUN_RIO_USB)
+- return 1;
+-
+- return 0;
+-}
+-
+ static void __init pdev_record_assignments(struct pci_pbm_info *pbm,
+ struct pci_dev *pdev)
+ {
+@@ -400,19 +387,23 @@ static void __init pdev_record_assignmen
+ pbm->parent->resource_adjust(pdev, res, root);
+
+ if (request_resource(root, res) < 0) {
++ int rnum;
++
+ /* OK, there is some conflict. But this is fine
+ * since we'll reassign it in the fixup pass.
+ *
+- * We notify the user that OBP made an error if it
+- * is a case we don't expect.
++ * Do not print the warning for ROM resources
++ * as such a conflict is quite common and
++ * harmless as the ROM bar is disabled.
+ */
+- if (!pdev_resource_collisions_expected(pdev)) {
+- printk(KERN_ERR "PCI: Address space collision on region %ld "
++ rnum = (res - &pdev->resource[0]);
++ if (rnum != PCI_ROM_RESOURCE)
++ printk(KERN_ERR "PCI: Resource collision, "
++ "region %d "
+ "[%016lx:%016lx] of device %s\n",
+- (res - &pdev->resource[0]),
++ rnum,
+ res->start, res->end,
+ pci_name(pdev));
+- }
+ }
+ }
+ }
+--- linux-2.6.18.1.orig/arch/sparc64/kernel/pci_sabre.c
++++ linux-2.6.18.1/arch/sparc64/kernel/pci_sabre.c
+@@ -1196,7 +1196,7 @@ static void pbm_register_toplevel_resour
+ &pbm->mem_space);
+ }
+
+-static void sabre_pbm_init(struct pci_controller_info *p, struct device_node *dp, u32 dma_begin)
++static void sabre_pbm_init(struct pci_controller_info *p, struct device_node *dp, u32 dma_start, u32 dma_end)
+ {
+ struct pci_pbm_info *pbm;
+ struct device_node *node;
+@@ -1261,6 +1261,8 @@ static void sabre_pbm_init(struct pci_co
+ node = node->sibling;
+ }
+ if (simbas_found == 0) {
++ struct resource *rp;
++
+ /* No APBs underneath, probably this is a hummingbird
+ * system.
+ */
+@@ -1302,8 +1304,10 @@ static void sabre_pbm_init(struct pci_co
+ pbm->io_space.end = pbm->io_space.start + (1UL << 24) - 1UL;
+ pbm->io_space.flags = IORESOURCE_IO;
+
+- pbm->mem_space.start = p->pbm_A.controller_regs + SABRE_MEMSPACE;
+- pbm->mem_space.end = pbm->mem_space.start + (unsigned long)dma_begin - 1UL;
++ pbm->mem_space.start =
++ (p->pbm_A.controller_regs + SABRE_MEMSPACE);
++ pbm->mem_space.end =
++ (pbm->mem_space.start + ((1UL << 32UL) - 1UL));
+ pbm->mem_space.flags = IORESOURCE_MEM;
+
+ if (request_resource(&ioport_resource, &pbm->io_space) < 0) {
+@@ -1315,6 +1319,17 @@ static void sabre_pbm_init(struct pci_co
+ prom_halt();
+ }
+
++ rp = kmalloc(sizeof(*rp), GFP_KERNEL);
++ if (!rp) {
++ prom_printf("Cannot allocate IOMMU resource.\n");
++ prom_halt();
++ }
++ rp->name = "IOMMU";
++ rp->start = pbm->mem_space.start + (unsigned long) dma_start;
++ rp->end = pbm->mem_space.start + (unsigned long) dma_end - 1UL;
++ rp->flags = IORESOURCE_BUSY;
++ request_resource(&pbm->mem_space, rp);
++
+ pci_register_legacy_regions(&pbm->io_space,
+ &pbm->mem_space);
+ }
+@@ -1450,5 +1465,5 @@ void sabre_init(struct device_node *dp,
+ /*
+ * Look for APB underneath.
+ */
+- sabre_pbm_init(p, dp, vdma[0]);
++ sabre_pbm_init(p, dp, vdma[0], vdma[0] + vdma[1]);
+ }
--- /dev/null
+From stable-bounces@linux.kernel.org Mon Oct 30 14:50:53 2006
+Date: Mon, 30 Oct 2006 14:47:35 -0800
+From: Stephen Hemminger <shemminger@osdl.org>
+To: stable@kernel.org
+Subject: tcp: cubic scaling error
+
+Doug Leith observed a discrepancy between the version of CUBIC described
+in the papers and the version in 2.6.18. A math error related to scaling
+causes Cubic to grow too slowly.
+
+Patch is from "Sangtae Ha" <sha2@ncsu.edu>. I validated that
+it does fix the problems.
+
+See the following to show behavior over 500ms 100 Mbit link.
+
+Sender (2.6.19-rc3) --- Bridge (2.6.18-rt7) ------- Receiver (2.6.19-rc3)
+ 1G [netem] 100M
+
+ http://developer.osdl.org/shemminger/tcp/2.6.19-rc3/cubic-orig.png
+ http://developer.osdl.org/shemminger/tcp/2.6.19-rc3/cubic-fix.png
+
+Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ net/ipv4/tcp_cubic.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- linux-2.6.18.1.orig/net/ipv4/tcp_cubic.c
++++ linux-2.6.18.1/net/ipv4/tcp_cubic.c
+@@ -190,7 +190,7 @@ static inline void bictcp_update(struct
+ */
+
+ /* change the unit from HZ to bictcp_HZ */
+- t = ((tcp_time_stamp + ca->delay_min - ca->epoch_start)
++ t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start)
+ << BICTCP_HZ) / HZ;
+
+ if (t < ca->bic_K) /* t - K */
+@@ -259,7 +259,7 @@ static inline void measure_delay(struct
+ (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+ return;
+
+- delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
++ delay = (tcp_time_stamp - tp->rx_opt.rcv_tsecr)<<3;
+ if (delay == 0)
+ delay = 1;
+
+@@ -366,7 +366,7 @@ static int __init cubictcp_register(void
+
+ beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
+
+- cube_rtt_scale = (bic_scale << 3) / 10; /* 1024*c/rtt */
++ cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */
+
+ /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
+ * so K = cubic_root( (wmax-cwnd)*rtt/c )
--- /dev/null
+From bbdb396a60b2ebf7de3b717991e5d3e28c8b7bbd Mon Sep 17 00:00:00 2001
+From: Martin Bligh <mbligh@google.com>
+Date: Sat, 28 Oct 2006 10:38:25 -0700
+Subject: Use min of two prio settings in calculating distress for reclaim
+
+If try_to_free_pages / balance_pgdat are called with a gfp_mask specifying
+GFP_IO and/or GFP_FS, they will reclaim the requisite number of pages, and the
+reset prev_priority to DEF_PRIORITY (or to some other high (ie: unurgent)
+value).
+
+However, another reclaimer without those gfp_mask flags set (say, GFP_NOIO)
+may still be struggling to reclaim pages. The concurrent overwrite of
+zone->prev_priority will cause this GFP_NOIO thread to unexpectedly cease
+deactivating mapped pages, thus causing reclaim difficulties.
+
+Fix this is to key the distress calculation not off zone->prev_priority, but
+also take into account the local caller's priority by using
+min(zone->prev_priority, sc->priority)
+
+Signed-off-by: Martin J. Bligh <mbligh@google.com>
+Cc: Nick Piggin <nickpiggin@yahoo.com.au>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ mm/vmscan.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- linux-2.6.18.1.orig/mm/vmscan.c
++++ linux-2.6.18.1/mm/vmscan.c
+@@ -727,7 +727,7 @@ static inline void note_zone_scanning_pr
+ * But we had to alter page->flags anyway.
+ */
+ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
+- struct scan_control *sc)
++ struct scan_control *sc, int priority)
+ {
+ unsigned long pgmoved;
+ int pgdeactivate = 0;
+@@ -748,7 +748,7 @@ static void shrink_active_list(unsigned
+ * `distress' is a measure of how much trouble we're having
+ * reclaiming pages. 0 -> no problems. 100 -> great trouble.
+ */
+- distress = 100 >> zone->prev_priority;
++ distress = 100 >> min(zone->prev_priority, priority);
+
+ /*
+ * The point of this algorithm is to decide when to start
+@@ -899,7 +899,7 @@ static unsigned long shrink_zone(int pri
+ nr_to_scan = min(nr_active,
+ (unsigned long)sc->swap_cluster_max);
+ nr_active -= nr_to_scan;
+- shrink_active_list(nr_to_scan, zone, sc);
++ shrink_active_list(nr_to_scan, zone, sc, priority);
+ }
+
+ if (nr_inactive) {
+@@ -1341,7 +1341,7 @@ static unsigned long shrink_all_zones(un
+ if (zone->nr_scan_active >= nr_pages || pass > 3) {
+ zone->nr_scan_active = 0;
+ nr_to_scan = min(nr_pages, zone->nr_active);
+- shrink_active_list(nr_to_scan, zone, sc);
++ shrink_active_list(nr_to_scan, zone, sc, prio);
+ }
+ }
+
--- /dev/null
+From 3bb1a852ab6c9cdf211a2f4a2f502340c8c38eca Mon Sep 17 00:00:00 2001
+From: Martin Bligh <mbligh@mbligh.org>
+Date: Sat, 28 Oct 2006 10:38:24 -0700
+Subject: vmscan: Fix temp_priority race
+
+The temp_priority field in zone is racy, as we can walk through a reclaim
+path, and just before we copy it into prev_priority, it can be overwritten
+(say with DEF_PRIORITY) by another reclaimer.
+
+The same bug is contained in both try_to_free_pages and balance_pgdat, but
+it is fixed slightly differently. In balance_pgdat, we keep a separate
+priority record per zone in a local array. In try_to_free_pages there is
+no need to do this, as the priority level is the same for all zones that we
+reclaim from.
+
+Impact of this bug is that temp_priority is copied into prev_priority, and
+setting this artificially high causes reclaimers to set distress
+artificially low. They then fail to reclaim mapped pages, when they are,
+in fact, under severe memory pressure (their priority may be as low as 0).
+This causes the OOM killer to fire incorrectly.
+
+From: Andrew Morton <akpm@osdl.org>
+
+__zone_reclaim() isn't modifying zone->prev_priority. But zone->prev_priority
+is used in the decision whether or not to bring mapped pages onto the inactive
+list. Hence there's a risk here that __zone_reclaim() will fail because
+zone->prev_priority ir large (ie: low urgency) and lots of mapped pages end up
+stuck on the active list.
+
+Fix that up by decreasing (ie making more urgent) zone->prev_priority as
+__zone_reclaim() scans the zone's pages.
+
+This bug perhaps explains why ZONE_RECLAIM_PRIORITY was created. It should be
+possible to remove that now, and to just start out at DEF_PRIORITY?
+
+Cc: Nick Piggin <nickpiggin@yahoo.com.au>
+Cc: Christoph Lameter <clameter@engr.sgi.com>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+[chrisw: minor wiggle to fit -stable]
+---
+ include/linux/mmzone.h | 6 -----
+ mm/page_alloc.c | 2 -
+ mm/vmscan.c | 55 ++++++++++++++++++++++++++++++++++++-------------
+ mm/vmstat.c | 2 -
+ 4 files changed, 43 insertions(+), 22 deletions(-)
+
+--- linux-2.6.18.1.orig/include/linux/mmzone.h
++++ linux-2.6.18.1/include/linux/mmzone.h
+@@ -200,13 +200,9 @@ struct zone {
+ * under - it drives the swappiness decision: whether to unmap mapped
+ * pages.
+ *
+- * temp_priority is used to remember the scanning priority at which
+- * this zone was successfully refilled to free_pages == pages_high.
+- *
+- * Access to both these fields is quite racy even on uniprocessor. But
++ * Access to both this field is quite racy even on uniprocessor. But
+ * it is expected to average out OK.
+ */
+- int temp_priority;
+ int prev_priority;
+
+
+--- linux-2.6.18.1.orig/mm/page_alloc.c
++++ linux-2.6.18.1/mm/page_alloc.c
+@@ -2021,7 +2021,7 @@ static void __meminit free_area_init_cor
+ zone->zone_pgdat = pgdat;
+ zone->free_pages = 0;
+
+- zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
++ zone->prev_priority = DEF_PRIORITY;
+
+ zone_pcp_init(zone);
+ INIT_LIST_HEAD(&zone->active_list);
+--- linux-2.6.18.1.orig/mm/vmscan.c
++++ linux-2.6.18.1/mm/vmscan.c
+@@ -696,6 +696,20 @@ done:
+ }
+
+ /*
++ * We are about to scan this zone at a certain priority level. If that priority
++ * level is smaller (ie: more urgent) than the previous priority, then note
++ * that priority level within the zone. This is done so that when the next
++ * process comes in to scan this zone, it will immediately start out at this
++ * priority level rather than having to build up its own scanning priority.
++ * Here, this priority affects only the reclaim-mapped threshold.
++ */
++static inline void note_zone_scanning_priority(struct zone *zone, int priority)
++{
++ if (priority < zone->prev_priority)
++ zone->prev_priority = priority;
++}
++
++/*
+ * This moves pages from the active list to the inactive list.
+ *
+ * We move them the other way if the page is referenced by one or more
+@@ -934,9 +948,7 @@ static unsigned long shrink_zones(int pr
+ if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+ continue;
+
+- zone->temp_priority = priority;
+- if (zone->prev_priority > priority)
+- zone->prev_priority = priority;
++ note_zone_scanning_priority(zone, priority);
+
+ if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ continue; /* Let kswapd poll it */
+@@ -984,7 +996,6 @@ unsigned long try_to_free_pages(struct z
+ if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+ continue;
+
+- zone->temp_priority = DEF_PRIORITY;
+ lru_pages += zone->nr_active + zone->nr_inactive;
+ }
+
+@@ -1022,13 +1033,22 @@ unsigned long try_to_free_pages(struct z
+ blk_congestion_wait(WRITE, HZ/10);
+ }
+ out:
++ /*
++ * Now that we've scanned all the zones at this priority level, note
++ * that level within the zone so that the next thread which performs
++ * scanning of this zone will immediately start out at this priority
++ * level. This affects only the decision whether or not to bring
++ * mapped pages onto the inactive list.
++ */
++ if (priority < 0)
++ priority = 0;
+ for (i = 0; zones[i] != 0; i++) {
+ struct zone *zone = zones[i];
+
+ if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+ continue;
+
+- zone->prev_priority = zone->temp_priority;
++ zone->prev_priority = priority;
+ }
+ return ret;
+ }
+@@ -1068,6 +1088,11 @@ static unsigned long balance_pgdat(pg_da
+ .swap_cluster_max = SWAP_CLUSTER_MAX,
+ .swappiness = vm_swappiness,
+ };
++ /*
++ * temp_priority is used to remember the scanning priority at which
++ * this zone was successfully refilled to free_pages == pages_high.
++ */
++ int temp_priority[MAX_NR_ZONES];
+
+ loop_again:
+ total_scanned = 0;
+@@ -1075,11 +1100,8 @@ loop_again:
+ sc.may_writepage = !laptop_mode;
+ count_vm_event(PAGEOUTRUN);
+
+- for (i = 0; i < pgdat->nr_zones; i++) {
+- struct zone *zone = pgdat->node_zones + i;
+-
+- zone->temp_priority = DEF_PRIORITY;
+- }
++ for (i = 0; i < pgdat->nr_zones; i++)
++ temp_priority[i] = DEF_PRIORITY;
+
+ for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+ int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
+@@ -1140,10 +1162,9 @@ scan:
+ if (!zone_watermark_ok(zone, order, zone->pages_high,
+ end_zone, 0))
+ all_zones_ok = 0;
+- zone->temp_priority = priority;
+- if (zone->prev_priority > priority)
+- zone->prev_priority = priority;
++ temp_priority[i] = priority;
+ sc.nr_scanned = 0;
++ note_zone_scanning_priority(zone, priority);
+ nr_reclaimed += shrink_zone(priority, zone, &sc);
+ reclaim_state->reclaimed_slab = 0;
+ nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
+@@ -1183,10 +1204,15 @@ scan:
+ break;
+ }
+ out:
++ /*
++ * Note within each zone the priority level at which this zone was
++ * brought into a happy state. So that the next thread which scans this
++ * zone will start out at that priority level.
++ */
+ for (i = 0; i < pgdat->nr_zones; i++) {
+ struct zone *zone = pgdat->node_zones + i;
+
+- zone->prev_priority = zone->temp_priority;
++ zone->prev_priority = temp_priority[i];
+ }
+ if (!all_zones_ok) {
+ cond_resched();
+@@ -1570,6 +1596,7 @@ static int __zone_reclaim(struct zone *z
+ */
+ priority = ZONE_RECLAIM_PRIORITY;
+ do {
++ note_zone_scanning_priority(zone, priority);
+ nr_reclaimed += shrink_zone(priority, zone, &sc);
+ priority--;
+ } while (priority >= 0 && nr_reclaimed < nr_pages);
+--- linux-2.6.18.1.orig/mm/vmstat.c
++++ linux-2.6.18.1/mm/vmstat.c
+@@ -586,11 +586,9 @@ static int zoneinfo_show(struct seq_file
+ seq_printf(m,
+ "\n all_unreclaimable: %u"
+ "\n prev_priority: %i"
+- "\n temp_priority: %i"
+ "\n start_pfn: %lu",
+ zone->all_unreclaimable,
+ zone->prev_priority,
+- zone->temp_priority,
+ zone->zone_start_pfn);
+ spin_unlock_irqrestore(&zone->lock, flags);
+ seq_putc(m, '\n');