Revert "Move xen patchset to new version's subdir."

[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.fixes / mm-fix-Commited_AS-underflow-on-large-NR_CPUS
diff --git a/src/patches/suse-2.6.27.25/patches.fixes/mm-fix-Commited_AS-underflow-on-large-NR_CPUS b/src/patches/suse-2.6.27.25/patches.fixes/mm-fix-Commited_AS-underflow-on-large-NR_CPUS

new file mode 100644 (file)

index 0000000..3f39c1f
--- /dev/null
+++ b/src/patches/suse-2.6.27.25/patches.fixes/mm-fix-Commited_AS-underflow-on-large-NR_CPUS
@@ -0,0 +1,215 @@
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Subject: mm: fix Committed_AS underflow on large NR_CPUS environment
+Patch-mainline: 2.6.30-rc5
+Git-commit: 00a62ce91e554198ef28234c91c36f850f5a3bc9
+References: bnc#505831
+
+mm: fix Committed_AS underflow on large NR_CPUS environment
+
+The Committed_AS field can underflow in certain situations:
+
+>         # while true; do cat /proc/meminfo  | grep _AS; sleep 1; done | uniq -c
+>               1 Committed_AS: 18446744073709323392 kB
+>              11 Committed_AS: 18446744073709455488 kB
+>               6 Committed_AS:    35136 kB
+>               5 Committed_AS: 18446744073709454400 kB
+>               7 Committed_AS:    35904 kB
+>               3 Committed_AS: 18446744073709453248 kB
+>               2 Committed_AS:    34752 kB
+>               9 Committed_AS: 18446744073709453248 kB
+>               8 Committed_AS:    34752 kB
+>               3 Committed_AS: 18446744073709320960 kB
+>               7 Committed_AS: 18446744073709454080 kB
+>               3 Committed_AS: 18446744073709320960 kB
+>               5 Committed_AS: 18446744073709454080 kB
+>               6 Committed_AS: 18446744073709320960 kB
+
+Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does
+not check for underflow.
+
+But NR_CPUS proportional isn't good calculation.  In general,
+possibility of lock contention is proportional to the number of online
+cpus, not theorical maximum cpus (NR_CPUS).
+
+The current kernel has generic percpu-counter stuff.  using it is right
+way.  it makes code simplify and percpu_counter_read_positive() don't
+make underflow issue.
+
+Reported-by: Dave Hansen <dave@linux.vnet.ibm.com>
+Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Eric B Munson <ebmunson@us.ibm.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Cc: Christoph Lameter <cl@linux-foundation.org>
+Cc: <stable@kernel.org>                [All kernel versions]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Backported-by: Jeff Mahoney <jeffm@suse.com>
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+---
+ fs/proc/proc_misc.c  |    2 +-
+ include/linux/mman.h |    9 +++------
+ kernel/fork.c        |    2 ++
+ mm/mmap.c            |    8 ++------
+ mm/nommu.c           |    9 +++------
+ mm/swap.c            |   46 ----------------------------------------------
+ 6 files changed, 11 insertions(+), 65 deletions(-)
+
+--- a/fs/proc/proc_misc.c
++++ b/fs/proc/proc_misc.c
+@@ -145,7 +145,7 @@ static int meminfo_read_proc(char *page,
+ #define K(x) ((x) << (PAGE_SHIFT - 10))
+       si_meminfo(&i);
+       si_swapinfo(&i);
+-      committed = atomic_long_read(&vm_committed_space);
++      committed = percpu_counter_read_positive(&vm_committed_as);
+       allowed = ((totalram_pages - hugetlb_total_pages())
+               * sysctl_overcommit_ratio / 100) + total_swap_pages;
+ 
+--- a/include/linux/mman.h
++++ b/include/linux/mman.h
+@@ -12,21 +12,18 @@
+ 
+ #ifdef __KERNEL__
+ #include <linux/mm.h>
++#include <linux/percpu_counter.h>
+ 
+ #include <asm/atomic.h>
+ 
+ extern int sysctl_overcommit_memory;
+ extern int sysctl_overcommit_ratio;
+-extern atomic_long_t vm_committed_space;
++extern struct percpu_counter vm_committed_as;
+ 
+-#ifdef CONFIG_SMP
+-extern void vm_acct_memory(long pages);
+-#else
+ static inline void vm_acct_memory(long pages)
+ {
+-      atomic_long_add(pages, &vm_committed_space);
++      percpu_counter_add(&vm_committed_as, pages);
+ }
+-#endif
+ 
+ static inline void vm_unacct_memory(long pages)
+ {
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1442,6 +1442,8 @@ void __init proc_caches_init(void)
+       mm_cachep = kmem_cache_create("mm_struct",
+                       sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
++      if (percpu_counter_init(&vm_committed_as, 0))
++              panic("Failed to allocate vm_committed_as");
+ }
+ 
+ /*
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -84,7 +84,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
+ int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
+ int sysctl_overcommit_ratio = 50;     /* default is 50% */
+ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
+-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
++struct percpu_counter vm_committed_as;
+ int heap_stack_gap __read_mostly = 1;
+ 
+ /*
+@@ -178,11 +178,7 @@ int __vm_enough_memory(struct mm_struct
+          leave 3% of the size of this process for other processes */
+       allowed -= mm->total_vm / 32;
+ 
+-      /*
+-       * cast `allowed' as a signed long because vm_committed_space
+-       * sometimes has a negative value
+-       */
+-      if (atomic_long_read(&vm_committed_space) < (long)allowed)
++      if (percpu_counter_read_positive(&vm_committed_as) < allowed)
+               return 0;
+ error:
+       vm_unacct_memory(pages);
+--- a/mm/nommu.c
++++ b/mm/nommu.c
+@@ -39,7 +39,7 @@ struct page *mem_map;
+ unsigned long max_mapnr;
+ unsigned long num_physpages;
+ unsigned long askedalloc, realalloc;
+-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
++struct percpu_counter vm_committed_as;
+ int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
+ int sysctl_overcommit_ratio = 50; /* default is 50% */
+ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
+@@ -1434,12 +1434,9 @@ int __vm_enough_memory(struct mm_struct
+          leave 3% of the size of this process for other processes */
+       allowed -= current->mm->total_vm / 32;
+ 
+-      /*
+-       * cast `allowed' as a signed long because vm_committed_space
+-       * sometimes has a negative value
+-       */
+-      if (atomic_long_read(&vm_committed_space) < (long)allowed)
++      if (percpu_counter_read_positive(&vm_committed_as) < allowed)
+               return 0;
++
+ error:
+       vm_unacct_memory(pages);
+ 
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -474,49 +474,6 @@ unsigned pagevec_lookup_tag(struct pagev
+ 
+ EXPORT_SYMBOL(pagevec_lookup_tag);
+ 
+-#ifdef CONFIG_SMP
+-/*
+- * We tolerate a little inaccuracy to avoid ping-ponging the counter between
+- * CPUs
+- */
+-#define ACCT_THRESHOLD        max(16, NR_CPUS * 2)
+-
+-static DEFINE_PER_CPU(long, committed_space);
+-
+-void vm_acct_memory(long pages)
+-{
+-      long *local;
+-
+-      preempt_disable();
+-      local = &__get_cpu_var(committed_space);
+-      *local += pages;
+-      if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
+-              atomic_long_add(*local, &vm_committed_space);
+-              *local = 0;
+-      }
+-      preempt_enable();
+-}
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-
+-/* Drop the CPU's cached committed space back into the central pool. */
+-static int cpu_swap_callback(struct notifier_block *nfb,
+-                           unsigned long action,
+-                           void *hcpu)
+-{
+-      long *committed;
+-
+-      committed = &per_cpu(committed_space, (long)hcpu);
+-      if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+-              atomic_long_add(*committed, &vm_committed_space);
+-              *committed = 0;
+-              drain_cpu_pagevecs((long)hcpu);
+-      }
+-      return NOTIFY_OK;
+-}
+-#endif /* CONFIG_HOTPLUG_CPU */
+-#endif /* CONFIG_SMP */
+-
+ /*
+  * Perform any setup for the swap system
+  */
+@@ -537,7 +494,4 @@ void __init swap_setup(void)
+        * Right now other parts of the system means that we
+        * _really_ don't want to cluster much more
+        */
+-#ifdef CONFIG_HOTPLUG_CPU
+-      hotcpu_notifier(cpu_swap_callback, 0);
+-#endif
+ }