--- /dev/null
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Subject: mm: fix Committed_AS underflow on large NR_CPUS environment
+Patch-mainline: 2.6.30-rc5
+Git-commit: 00a62ce91e554198ef28234c91c36f850f5a3bc9
+References: bnc#505831
+
+mm: fix Committed_AS underflow on large NR_CPUS environment
+
+The Committed_AS field can underflow in certain situations:
+
+> # while true; do cat /proc/meminfo | grep _AS; sleep 1; done | uniq -c
+> 1 Committed_AS: 18446744073709323392 kB
+> 11 Committed_AS: 18446744073709455488 kB
+> 6 Committed_AS: 35136 kB
+> 5 Committed_AS: 18446744073709454400 kB
+> 7 Committed_AS: 35904 kB
+> 3 Committed_AS: 18446744073709453248 kB
+> 2 Committed_AS: 34752 kB
+> 9 Committed_AS: 18446744073709453248 kB
+> 8 Committed_AS: 34752 kB
+> 3 Committed_AS: 18446744073709320960 kB
+> 7 Committed_AS: 18446744073709454080 kB
+> 3 Committed_AS: 18446744073709320960 kB
+> 5 Committed_AS: 18446744073709454080 kB
+> 6 Committed_AS: 18446744073709320960 kB
+
+Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does
+not check for underflow.
+
+But NR_CPUS proportional isn't good calculation. In general,
+possibility of lock contention is proportional to the number of online
+cpus, not theorical maximum cpus (NR_CPUS).
+
+The current kernel has generic percpu-counter stuff. using it is right
+way. it makes code simplify and percpu_counter_read_positive() don't
+make underflow issue.
+
+Reported-by: Dave Hansen <dave@linux.vnet.ibm.com>
+Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Eric B Munson <ebmunson@us.ibm.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Cc: Christoph Lameter <cl@linux-foundation.org>
+Cc: <stable@kernel.org> [All kernel versions]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Backported-by: Jeff Mahoney <jeffm@suse.com>
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+---
+ fs/proc/proc_misc.c | 2 +-
+ include/linux/mman.h | 9 +++------
+ kernel/fork.c | 2 ++
+ mm/mmap.c | 8 ++------
+ mm/nommu.c | 9 +++------
+ mm/swap.c | 46 ----------------------------------------------
+ 6 files changed, 11 insertions(+), 65 deletions(-)
+
+--- a/fs/proc/proc_misc.c
++++ b/fs/proc/proc_misc.c
+@@ -145,7 +145,7 @@ static int meminfo_read_proc(char *page,
+ #define K(x) ((x) << (PAGE_SHIFT - 10))
+ si_meminfo(&i);
+ si_swapinfo(&i);
+- committed = atomic_long_read(&vm_committed_space);
++ committed = percpu_counter_read_positive(&vm_committed_as);
+ allowed = ((totalram_pages - hugetlb_total_pages())
+ * sysctl_overcommit_ratio / 100) + total_swap_pages;
+
+--- a/include/linux/mman.h
++++ b/include/linux/mman.h
+@@ -12,21 +12,18 @@
+
+ #ifdef __KERNEL__
+ #include <linux/mm.h>
++#include <linux/percpu_counter.h>
+
+ #include <asm/atomic.h>
+
+ extern int sysctl_overcommit_memory;
+ extern int sysctl_overcommit_ratio;
+-extern atomic_long_t vm_committed_space;
++extern struct percpu_counter vm_committed_as;
+
+-#ifdef CONFIG_SMP
+-extern void vm_acct_memory(long pages);
+-#else
+ static inline void vm_acct_memory(long pages)
+ {
+- atomic_long_add(pages, &vm_committed_space);
++ percpu_counter_add(&vm_committed_as, pages);
+ }
+-#endif
+
+ static inline void vm_unacct_memory(long pages)
+ {
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1442,6 +1442,8 @@ void __init proc_caches_init(void)
+ mm_cachep = kmem_cache_create("mm_struct",
+ sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
++ if (percpu_counter_init(&vm_committed_as, 0))
++ panic("Failed to allocate vm_committed_as");
+ }
+
+ /*
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -84,7 +84,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
+ int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
+ int sysctl_overcommit_ratio = 50; /* default is 50% */
+ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
+-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
++struct percpu_counter vm_committed_as;
+ int heap_stack_gap __read_mostly = 1;
+
+ /*
+@@ -178,11 +178,7 @@ int __vm_enough_memory(struct mm_struct
+ leave 3% of the size of this process for other processes */
+ allowed -= mm->total_vm / 32;
+
+- /*
+- * cast `allowed' as a signed long because vm_committed_space
+- * sometimes has a negative value
+- */
+- if (atomic_long_read(&vm_committed_space) < (long)allowed)
++ if (percpu_counter_read_positive(&vm_committed_as) < allowed)
+ return 0;
+ error:
+ vm_unacct_memory(pages);
+--- a/mm/nommu.c
++++ b/mm/nommu.c
+@@ -39,7 +39,7 @@ struct page *mem_map;
+ unsigned long max_mapnr;
+ unsigned long num_physpages;
+ unsigned long askedalloc, realalloc;
+-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
++struct percpu_counter vm_committed_as;
+ int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
+ int sysctl_overcommit_ratio = 50; /* default is 50% */
+ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
+@@ -1434,12 +1434,9 @@ int __vm_enough_memory(struct mm_struct
+ leave 3% of the size of this process for other processes */
+ allowed -= current->mm->total_vm / 32;
+
+- /*
+- * cast `allowed' as a signed long because vm_committed_space
+- * sometimes has a negative value
+- */
+- if (atomic_long_read(&vm_committed_space) < (long)allowed)
++ if (percpu_counter_read_positive(&vm_committed_as) < allowed)
+ return 0;
++
+ error:
+ vm_unacct_memory(pages);
+
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -474,49 +474,6 @@ unsigned pagevec_lookup_tag(struct pagev
+
+ EXPORT_SYMBOL(pagevec_lookup_tag);
+
+-#ifdef CONFIG_SMP
+-/*
+- * We tolerate a little inaccuracy to avoid ping-ponging the counter between
+- * CPUs
+- */
+-#define ACCT_THRESHOLD max(16, NR_CPUS * 2)
+-
+-static DEFINE_PER_CPU(long, committed_space);
+-
+-void vm_acct_memory(long pages)
+-{
+- long *local;
+-
+- preempt_disable();
+- local = &__get_cpu_var(committed_space);
+- *local += pages;
+- if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
+- atomic_long_add(*local, &vm_committed_space);
+- *local = 0;
+- }
+- preempt_enable();
+-}
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-
+-/* Drop the CPU's cached committed space back into the central pool. */
+-static int cpu_swap_callback(struct notifier_block *nfb,
+- unsigned long action,
+- void *hcpu)
+-{
+- long *committed;
+-
+- committed = &per_cpu(committed_space, (long)hcpu);
+- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+- atomic_long_add(*committed, &vm_committed_space);
+- *committed = 0;
+- drain_cpu_pagevecs((long)hcpu);
+- }
+- return NOTIFY_OK;
+-}
+-#endif /* CONFIG_HOTPLUG_CPU */
+-#endif /* CONFIG_SMP */
+-
+ /*
+ * Perform any setup for the swap system
+ */
+@@ -537,7 +494,4 @@ void __init swap_setup(void)
+ * Right now other parts of the system means that we
+ * _really_ don't want to cluster much more
+ */
+-#ifdef CONFIG_HOTPLUG_CPU
+- hotcpu_notifier(cpu_swap_callback, 0);
+-#endif
+ }