+++ /dev/null
-From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
-Subject: mm: fix Committed_AS underflow on large NR_CPUS environment
-Patch-mainline: 2.6.30-rc5
-Git-commit: 00a62ce91e554198ef28234c91c36f850f5a3bc9
-References: bnc#505831
-
-mm: fix Committed_AS underflow on large NR_CPUS environment
-
-The Committed_AS field can underflow in certain situations:
-
-> # while true; do cat /proc/meminfo | grep _AS; sleep 1; done | uniq -c
-> 1 Committed_AS: 18446744073709323392 kB
-> 11 Committed_AS: 18446744073709455488 kB
-> 6 Committed_AS: 35136 kB
-> 5 Committed_AS: 18446744073709454400 kB
-> 7 Committed_AS: 35904 kB
-> 3 Committed_AS: 18446744073709453248 kB
-> 2 Committed_AS: 34752 kB
-> 9 Committed_AS: 18446744073709453248 kB
-> 8 Committed_AS: 34752 kB
-> 3 Committed_AS: 18446744073709320960 kB
-> 7 Committed_AS: 18446744073709454080 kB
-> 3 Committed_AS: 18446744073709320960 kB
-> 5 Committed_AS: 18446744073709454080 kB
-> 6 Committed_AS: 18446744073709320960 kB
-
-Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does
-not check for underflow.
-
-But NR_CPUS proportional isn't good calculation. In general,
-possibility of lock contention is proportional to the number of online
-cpus, not theorical maximum cpus (NR_CPUS).
-
-The current kernel has generic percpu-counter stuff. using it is right
-way. it makes code simplify and percpu_counter_read_positive() don't
-make underflow issue.
-
-Reported-by: Dave Hansen <dave@linux.vnet.ibm.com>
-Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
-Cc: Eric B Munson <ebmunson@us.ibm.com>
-Cc: Mel Gorman <mel@csn.ul.ie>
-Cc: Christoph Lameter <cl@linux-foundation.org>
-Cc: <stable@kernel.org> [All kernel versions]
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-Backported-by: Jeff Mahoney <jeffm@suse.com>
-Signed-off-by: Jeff Mahoney <jeffm@suse.com>
----
- fs/proc/proc_misc.c | 2 +-
- include/linux/mman.h | 9 +++------
- kernel/fork.c | 2 ++
- mm/mmap.c | 8 ++------
- mm/nommu.c | 9 +++------
- mm/swap.c | 46 ----------------------------------------------
- 6 files changed, 11 insertions(+), 65 deletions(-)
-
---- a/fs/proc/proc_misc.c
-+++ b/fs/proc/proc_misc.c
-@@ -145,7 +145,7 @@ static int meminfo_read_proc(char *page,
- #define K(x) ((x) << (PAGE_SHIFT - 10))
- si_meminfo(&i);
- si_swapinfo(&i);
-- committed = atomic_long_read(&vm_committed_space);
-+ committed = percpu_counter_read_positive(&vm_committed_as);
- allowed = ((totalram_pages - hugetlb_total_pages())
- * sysctl_overcommit_ratio / 100) + total_swap_pages;
-
---- a/include/linux/mman.h
-+++ b/include/linux/mman.h
-@@ -12,21 +12,18 @@
-
- #ifdef __KERNEL__
- #include <linux/mm.h>
-+#include <linux/percpu_counter.h>
-
- #include <asm/atomic.h>
-
- extern int sysctl_overcommit_memory;
- extern int sysctl_overcommit_ratio;
--extern atomic_long_t vm_committed_space;
-+extern struct percpu_counter vm_committed_as;
-
--#ifdef CONFIG_SMP
--extern void vm_acct_memory(long pages);
--#else
- static inline void vm_acct_memory(long pages)
- {
-- atomic_long_add(pages, &vm_committed_space);
-+ percpu_counter_add(&vm_committed_as, pages);
- }
--#endif
-
- static inline void vm_unacct_memory(long pages)
- {
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -1442,6 +1442,8 @@ void __init proc_caches_init(void)
- mm_cachep = kmem_cache_create("mm_struct",
- sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
-+ if (percpu_counter_init(&vm_committed_as, 0))
-+ panic("Failed to allocate vm_committed_as");
- }
-
- /*
---- a/mm/mmap.c
-+++ b/mm/mmap.c
-@@ -84,7 +84,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
- int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
- int sysctl_overcommit_ratio = 50; /* default is 50% */
- int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
--atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
-+struct percpu_counter vm_committed_as;
- int heap_stack_gap __read_mostly = 1;
-
- /*
-@@ -178,11 +178,7 @@ int __vm_enough_memory(struct mm_struct
- leave 3% of the size of this process for other processes */
- allowed -= mm->total_vm / 32;
-
-- /*
-- * cast `allowed' as a signed long because vm_committed_space
-- * sometimes has a negative value
-- */
-- if (atomic_long_read(&vm_committed_space) < (long)allowed)
-+ if (percpu_counter_read_positive(&vm_committed_as) < allowed)
- return 0;
- error:
- vm_unacct_memory(pages);
---- a/mm/nommu.c
-+++ b/mm/nommu.c
-@@ -39,7 +39,7 @@ struct page *mem_map;
- unsigned long max_mapnr;
- unsigned long num_physpages;
- unsigned long askedalloc, realalloc;
--atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
-+struct percpu_counter vm_committed_as;
- int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
- int sysctl_overcommit_ratio = 50; /* default is 50% */
- int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
-@@ -1434,12 +1434,9 @@ int __vm_enough_memory(struct mm_struct
- leave 3% of the size of this process for other processes */
- allowed -= current->mm->total_vm / 32;
-
-- /*
-- * cast `allowed' as a signed long because vm_committed_space
-- * sometimes has a negative value
-- */
-- if (atomic_long_read(&vm_committed_space) < (long)allowed)
-+ if (percpu_counter_read_positive(&vm_committed_as) < allowed)
- return 0;
-+
- error:
- vm_unacct_memory(pages);
-
---- a/mm/swap.c
-+++ b/mm/swap.c
-@@ -474,49 +474,6 @@ unsigned pagevec_lookup_tag(struct pagev
-
- EXPORT_SYMBOL(pagevec_lookup_tag);
-
--#ifdef CONFIG_SMP
--/*
-- * We tolerate a little inaccuracy to avoid ping-ponging the counter between
-- * CPUs
-- */
--#define ACCT_THRESHOLD max(16, NR_CPUS * 2)
--
--static DEFINE_PER_CPU(long, committed_space);
--
--void vm_acct_memory(long pages)
--{
-- long *local;
--
-- preempt_disable();
-- local = &__get_cpu_var(committed_space);
-- *local += pages;
-- if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
-- atomic_long_add(*local, &vm_committed_space);
-- *local = 0;
-- }
-- preempt_enable();
--}
--
--#ifdef CONFIG_HOTPLUG_CPU
--
--/* Drop the CPU's cached committed space back into the central pool. */
--static int cpu_swap_callback(struct notifier_block *nfb,
-- unsigned long action,
-- void *hcpu)
--{
-- long *committed;
--
-- committed = &per_cpu(committed_space, (long)hcpu);
-- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-- atomic_long_add(*committed, &vm_committed_space);
-- *committed = 0;
-- drain_cpu_pagevecs((long)hcpu);
-- }
-- return NOTIFY_OK;
--}
--#endif /* CONFIG_HOTPLUG_CPU */
--#endif /* CONFIG_SMP */
--
- /*
- * Perform any setup for the swap system
- */
-@@ -537,7 +494,4 @@ void __init swap_setup(void)
- * Right now other parts of the system means that we
- * _really_ don't want to cluster much more
- */
--#ifdef CONFIG_HOTPLUG_CPU
-- hotcpu_notifier(cpu_swap_callback, 0);
--#endif
- }