--- /dev/null
+From 74a03b69d1b5ce00a568e142ca97e76b7f5239c6 Mon Sep 17 00:00:00 2001
+From: john stultz <johnstul@us.ibm.com>
+Date: Fri, 1 May 2009 13:10:25 -0700
+Subject: clockevents: prevent endless loop in tick_handle_periodic()
+
+From: john stultz <johnstul@us.ibm.com>
+
+commit 74a03b69d1b5ce00a568e142ca97e76b7f5239c6 upstream.
+
+tick_handle_periodic() can lock up hard when a one shot clock event
+device is used in combination with jiffies clocksource.
+
+Avoid an endless loop issue by requiring that a highres valid
+clocksource be installed before we call tick_periodic() in a loop when
+using ONESHOT mode. The result is we will only increment jiffies once
+per interrupt until a continuous hardware clocksource is available.
+
+Without this, we can run into a endless loop, where each cycle through
+the loop, jiffies is updated which increments time by tick_period or
+more (due to clock steering), which can cause the event programming to
+think the next event was before the newly incremented time and fail
+causing tick_periodic() to be called again and the whole process loops
+forever.
+
+[ Impact: prevent hard lock up ]
+
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+--- a/kernel/time/tick-common.c
++++ b/kernel/time/tick-common.c
+@@ -93,7 +93,17 @@ void tick_handle_periodic(struct clock_event_device *dev)
+ for (;;) {
+ if (!clockevents_program_event(dev, next, ktime_get()))
+ return;
+- tick_periodic(cpu);
++ /*
++ * Have to be careful here. If we're in oneshot mode,
++ * before we call tick_periodic() in a loop, we need
++ * to be sure we're using a real hardware clocksource.
++ * Otherwise we could get trapped in an infinite
++ * loop, as the tick_periodic() increments jiffies,
++ * when then will increment time, posibly causing
++ * the loop to trigger again and again.
++ */
++ if (timekeeping_valid_for_hres())
++ tick_periodic(cpu);
+ next = ktime_add(next, tick_period);
+ }
+ }
--- /dev/null
+From foo@baz Tue May 5 23:28:23 PDT 2009
+Date: Tue Apr 28 22:48:11 2009 +0200
+To: Greg KH <greg@kroah.com>
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Subject: mm: fix Committed_AS underflow on large NR_CPUS environment
+
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+
+commit 00a62ce91e554198ef28234c91c36f850f5a3bc9 upstream
+
+The Committed_AS field can underflow in certain situations:
+
+> # while true; do cat /proc/meminfo | grep _AS; sleep 1; done | uniq -c
+> 1 Committed_AS: 18446744073709323392 kB
+> 11 Committed_AS: 18446744073709455488 kB
+> 6 Committed_AS: 35136 kB
+> 5 Committed_AS: 18446744073709454400 kB
+> 7 Committed_AS: 35904 kB
+> 3 Committed_AS: 18446744073709453248 kB
+> 2 Committed_AS: 34752 kB
+> 9 Committed_AS: 18446744073709453248 kB
+> 8 Committed_AS: 34752 kB
+> 3 Committed_AS: 18446744073709320960 kB
+> 7 Committed_AS: 18446744073709454080 kB
+> 3 Committed_AS: 18446744073709320960 kB
+> 5 Committed_AS: 18446744073709454080 kB
+> 6 Committed_AS: 18446744073709320960 kB
+
+Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does
+not check for underflow.
+
+But NR_CPUS proportional isn't good calculation. In general,
+possibility of lock contention is proportional to the number of online
+cpus, not theorical maximum cpus (NR_CPUS).
+
+The current kernel has generic percpu-counter stuff. using it is right
+way. it makes code simplify and percpu_counter_read_positive() don't
+make underflow issue.
+
+Reported-by: Dave Hansen <dave@linux.vnet.ibm.com>
+Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Eric B Munson <ebmunson@us.ibm.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Cc: Christoph Lameter <cl@linux-foundation.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/proc/meminfo.c | 2 +-
+ include/linux/mman.h | 9 +++------
+ mm/mmap.c | 12 ++++++------
+ mm/nommu.c | 13 +++++++------
+ mm/swap.c | 46 ----------------------------------------------
+ 5 files changed, 17 insertions(+), 65 deletions(-)
+
+--- a/fs/proc/meminfo.c
++++ b/fs/proc/meminfo.c
+@@ -35,7 +35,7 @@ static int meminfo_proc_show(struct seq_
+ #define K(x) ((x) << (PAGE_SHIFT - 10))
+ si_meminfo(&i);
+ si_swapinfo(&i);
+- committed = atomic_long_read(&vm_committed_space);
++ committed = percpu_counter_read_positive(&vm_committed_as);
+ allowed = ((totalram_pages - hugetlb_total_pages())
+ * sysctl_overcommit_ratio / 100) + total_swap_pages;
+
+--- a/include/linux/mman.h
++++ b/include/linux/mman.h
+@@ -12,21 +12,18 @@
+
+ #ifdef __KERNEL__
+ #include <linux/mm.h>
++#include <linux/percpu_counter.h>
+
+ #include <asm/atomic.h>
+
+ extern int sysctl_overcommit_memory;
+ extern int sysctl_overcommit_ratio;
+-extern atomic_long_t vm_committed_space;
++extern struct percpu_counter vm_committed_as;
+
+-#ifdef CONFIG_SMP
+-extern void vm_acct_memory(long pages);
+-#else
+ static inline void vm_acct_memory(long pages)
+ {
+- atomic_long_add(pages, &vm_committed_space);
++ percpu_counter_add(&vm_committed_as, pages);
+ }
+-#endif
+
+ static inline void vm_unacct_memory(long pages)
+ {
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -84,7 +84,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
+ int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
+ int sysctl_overcommit_ratio = 50; /* default is 50% */
+ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
+-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
++struct percpu_counter vm_committed_as;
+
+ /*
+ * Check that a process has enough memory to allocate a new virtual
+@@ -178,11 +178,7 @@ int __vm_enough_memory(struct mm_struct
+ if (mm)
+ allowed -= mm->total_vm / 32;
+
+- /*
+- * cast `allowed' as a signed long because vm_committed_space
+- * sometimes has a negative value
+- */
+- if (atomic_long_read(&vm_committed_space) < (long)allowed)
++ if (percpu_counter_read_positive(&vm_committed_as) < allowed)
+ return 0;
+ error:
+ vm_unacct_memory(pages);
+@@ -2477,6 +2473,10 @@ void mm_drop_all_locks(struct mm_struct
+ */
+ void __init mmap_init(void)
+ {
++ int ret;
++
++ ret = percpu_counter_init(&vm_committed_as, 0);
++ VM_BUG_ON(ret);
+ vm_area_cachep = kmem_cache_create("vm_area_struct",
+ sizeof(struct vm_area_struct), 0,
+ SLAB_PANIC, NULL);
+--- a/mm/nommu.c
++++ b/mm/nommu.c
+@@ -62,7 +62,7 @@ void *high_memory;
+ struct page *mem_map;
+ unsigned long max_mapnr;
+ unsigned long num_physpages;
+-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
++struct percpu_counter vm_committed_as;
+ int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
+ int sysctl_overcommit_ratio = 50; /* default is 50% */
+ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
+@@ -463,6 +463,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
+ */
+ void __init mmap_init(void)
+ {
++ int ret;
++
++ ret = percpu_counter_init(&vm_committed_as, 0);
++ VM_BUG_ON(ret);
+ vm_region_jar = kmem_cache_create("vm_region_jar",
+ sizeof(struct vm_region), 0,
+ SLAB_PANIC, NULL);
+@@ -1849,12 +1853,9 @@ int __vm_enough_memory(struct mm_struct
+ if (mm)
+ allowed -= mm->total_vm / 32;
+
+- /*
+- * cast `allowed' as a signed long because vm_committed_space
+- * sometimes has a negative value
+- */
+- if (atomic_long_read(&vm_committed_space) < (long)allowed)
++ if (percpu_counter_read_positive(&vm_committed_as) < allowed)
+ return 0;
++
+ error:
+ vm_unacct_memory(pages);
+
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -514,49 +514,6 @@ unsigned pagevec_lookup_tag(struct pagev
+
+ EXPORT_SYMBOL(pagevec_lookup_tag);
+
+-#ifdef CONFIG_SMP
+-/*
+- * We tolerate a little inaccuracy to avoid ping-ponging the counter between
+- * CPUs
+- */
+-#define ACCT_THRESHOLD max(16, NR_CPUS * 2)
+-
+-static DEFINE_PER_CPU(long, committed_space);
+-
+-void vm_acct_memory(long pages)
+-{
+- long *local;
+-
+- preempt_disable();
+- local = &__get_cpu_var(committed_space);
+- *local += pages;
+- if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
+- atomic_long_add(*local, &vm_committed_space);
+- *local = 0;
+- }
+- preempt_enable();
+-}
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-
+-/* Drop the CPU's cached committed space back into the central pool. */
+-static int cpu_swap_callback(struct notifier_block *nfb,
+- unsigned long action,
+- void *hcpu)
+-{
+- long *committed;
+-
+- committed = &per_cpu(committed_space, (long)hcpu);
+- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+- atomic_long_add(*committed, &vm_committed_space);
+- *committed = 0;
+- drain_cpu_pagevecs((long)hcpu);
+- }
+- return NOTIFY_OK;
+-}
+-#endif /* CONFIG_HOTPLUG_CPU */
+-#endif /* CONFIG_SMP */
+-
+ /*
+ * Perform any setup for the swap system
+ */
+@@ -577,7 +534,4 @@ void __init swap_setup(void)
+ * Right now other parts of the system means that we
+ * _really_ don't want to cluster much more
+ */
+-#ifdef CONFIG_HOTPLUG_CPU
+- hotcpu_notifier(cpu_swap_callback, 0);
+-#endif
+ }
--- /dev/null
+From a425a638c858fd10370b573bde81df3ba500e271 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mel@csn.ul.ie>
+Date: Tue, 5 May 2009 16:37:17 +0100
+Subject: Ignore madvise(MADV_WILLNEED) for hugetlbfs-backed regions
+
+From: Mel Gorman <mel@csn.ul.ie>
+
+commit a425a638c858fd10370b573bde81df3ba500e271 upstream.
+
+madvise(MADV_WILLNEED) forces page cache readahead on a range of memory
+backed by a file. The assumption is made that the page required is
+order-0 and "normal" page cache.
+
+On hugetlbfs, this assumption is not true and order-0 pages are
+allocated and inserted into the hugetlbfs page cache. This leaks
+hugetlbfs page reservations and can cause BUGs to trigger related to
+corrupted page tables.
+
+This patch causes MADV_WILLNEED to be ignored for hugetlbfs-backed
+regions.
+
+Signed-off-by: Mel Gorman <mel@csn.ul.ie>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/madvise.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -112,6 +112,14 @@ static long madvise_willneed(struct vm_a
+ if (!file)
+ return -EBADF;
+
++ /*
++ * Page cache readahead assumes page cache pages are order-0 which
++ * is not the case for hugetlbfs. Do not give a bad return value
++ * but ignore the advice.
++ */
++ if (vma->vm_flags & VM_HUGETLB)
++ return 0;
++
+ if (file->f_mapping->a_ops->get_xip_mem) {
+ /* no bad return value, but ignore advice */
+ return 0;