]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.25/patches.fixes/mm-fix-Commited_AS-underflow-on-large-NR_CPUS
Changed checkfs to auto reboot after correctable fsck fixes.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.fixes / mm-fix-Commited_AS-underflow-on-large-NR_CPUS
CommitLineData
00e5a55c
BS
1From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
2Subject: mm: fix Committed_AS underflow on large NR_CPUS environment
3Patch-mainline: 2.6.30-rc5
4Git-commit: 00a62ce91e554198ef28234c91c36f850f5a3bc9
5References: bnc#505831
6
7mm: fix Committed_AS underflow on large NR_CPUS environment
8
9The Committed_AS field can underflow in certain situations:
10
11> # while true; do cat /proc/meminfo | grep _AS; sleep 1; done | uniq -c
12> 1 Committed_AS: 18446744073709323392 kB
13> 11 Committed_AS: 18446744073709455488 kB
14> 6 Committed_AS: 35136 kB
15> 5 Committed_AS: 18446744073709454400 kB
16> 7 Committed_AS: 35904 kB
17> 3 Committed_AS: 18446744073709453248 kB
18> 2 Committed_AS: 34752 kB
19> 9 Committed_AS: 18446744073709453248 kB
20> 8 Committed_AS: 34752 kB
21> 3 Committed_AS: 18446744073709320960 kB
22> 7 Committed_AS: 18446744073709454080 kB
23> 3 Committed_AS: 18446744073709320960 kB
24> 5 Committed_AS: 18446744073709454080 kB
25> 6 Committed_AS: 18446744073709320960 kB
26
27Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does
28not check for underflow.
29
30But NR_CPUS proportional isn't good calculation. In general,
31possibility of lock contention is proportional to the number of online
32cpus, not theorical maximum cpus (NR_CPUS).
33
34The current kernel has generic percpu-counter stuff. using it is right
35way. it makes code simplify and percpu_counter_read_positive() don't
36make underflow issue.
37
38Reported-by: Dave Hansen <dave@linux.vnet.ibm.com>
39Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
40Cc: Eric B Munson <ebmunson@us.ibm.com>
41Cc: Mel Gorman <mel@csn.ul.ie>
42Cc: Christoph Lameter <cl@linux-foundation.org>
43Cc: <stable@kernel.org> [All kernel versions]
44Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
45Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
46Backported-by: Jeff Mahoney <jeffm@suse.com>
47Signed-off-by: Jeff Mahoney <jeffm@suse.com>
48---
49 fs/proc/proc_misc.c | 2 +-
50 include/linux/mman.h | 9 +++------
51 kernel/fork.c | 2 ++
52 mm/mmap.c | 8 ++------
53 mm/nommu.c | 9 +++------
54 mm/swap.c | 46 ----------------------------------------------
55 6 files changed, 11 insertions(+), 65 deletions(-)
56
57--- a/fs/proc/proc_misc.c
58+++ b/fs/proc/proc_misc.c
59@@ -145,7 +145,7 @@ static int meminfo_read_proc(char *page,
60 #define K(x) ((x) << (PAGE_SHIFT - 10))
61 si_meminfo(&i);
62 si_swapinfo(&i);
63- committed = atomic_long_read(&vm_committed_space);
64+ committed = percpu_counter_read_positive(&vm_committed_as);
65 allowed = ((totalram_pages - hugetlb_total_pages())
66 * sysctl_overcommit_ratio / 100) + total_swap_pages;
67
68--- a/include/linux/mman.h
69+++ b/include/linux/mman.h
70@@ -12,21 +12,18 @@
71
72 #ifdef __KERNEL__
73 #include <linux/mm.h>
74+#include <linux/percpu_counter.h>
75
76 #include <asm/atomic.h>
77
78 extern int sysctl_overcommit_memory;
79 extern int sysctl_overcommit_ratio;
80-extern atomic_long_t vm_committed_space;
81+extern struct percpu_counter vm_committed_as;
82
83-#ifdef CONFIG_SMP
84-extern void vm_acct_memory(long pages);
85-#else
86 static inline void vm_acct_memory(long pages)
87 {
88- atomic_long_add(pages, &vm_committed_space);
89+ percpu_counter_add(&vm_committed_as, pages);
90 }
91-#endif
92
93 static inline void vm_unacct_memory(long pages)
94 {
95--- a/kernel/fork.c
96+++ b/kernel/fork.c
97@@ -1442,6 +1442,8 @@ void __init proc_caches_init(void)
98 mm_cachep = kmem_cache_create("mm_struct",
99 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
100 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
101+ if (percpu_counter_init(&vm_committed_as, 0))
102+ panic("Failed to allocate vm_committed_as");
103 }
104
105 /*
106--- a/mm/mmap.c
107+++ b/mm/mmap.c
108@@ -84,7 +84,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
109 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
110 int sysctl_overcommit_ratio = 50; /* default is 50% */
111 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
112-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
113+struct percpu_counter vm_committed_as;
114 int heap_stack_gap __read_mostly = 1;
115
116 /*
117@@ -178,11 +178,7 @@ int __vm_enough_memory(struct mm_struct
118 leave 3% of the size of this process for other processes */
119 allowed -= mm->total_vm / 32;
120
121- /*
122- * cast `allowed' as a signed long because vm_committed_space
123- * sometimes has a negative value
124- */
125- if (atomic_long_read(&vm_committed_space) < (long)allowed)
126+ if (percpu_counter_read_positive(&vm_committed_as) < allowed)
127 return 0;
128 error:
129 vm_unacct_memory(pages);
130--- a/mm/nommu.c
131+++ b/mm/nommu.c
132@@ -39,7 +39,7 @@ struct page *mem_map;
133 unsigned long max_mapnr;
134 unsigned long num_physpages;
135 unsigned long askedalloc, realalloc;
136-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
137+struct percpu_counter vm_committed_as;
138 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
139 int sysctl_overcommit_ratio = 50; /* default is 50% */
140 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
141@@ -1434,12 +1434,9 @@ int __vm_enough_memory(struct mm_struct
142 leave 3% of the size of this process for other processes */
143 allowed -= current->mm->total_vm / 32;
144
145- /*
146- * cast `allowed' as a signed long because vm_committed_space
147- * sometimes has a negative value
148- */
149- if (atomic_long_read(&vm_committed_space) < (long)allowed)
150+ if (percpu_counter_read_positive(&vm_committed_as) < allowed)
151 return 0;
152+
153 error:
154 vm_unacct_memory(pages);
155
156--- a/mm/swap.c
157+++ b/mm/swap.c
158@@ -474,49 +474,6 @@ unsigned pagevec_lookup_tag(struct pagev
159
160 EXPORT_SYMBOL(pagevec_lookup_tag);
161
162-#ifdef CONFIG_SMP
163-/*
164- * We tolerate a little inaccuracy to avoid ping-ponging the counter between
165- * CPUs
166- */
167-#define ACCT_THRESHOLD max(16, NR_CPUS * 2)
168-
169-static DEFINE_PER_CPU(long, committed_space);
170-
171-void vm_acct_memory(long pages)
172-{
173- long *local;
174-
175- preempt_disable();
176- local = &__get_cpu_var(committed_space);
177- *local += pages;
178- if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
179- atomic_long_add(*local, &vm_committed_space);
180- *local = 0;
181- }
182- preempt_enable();
183-}
184-
185-#ifdef CONFIG_HOTPLUG_CPU
186-
187-/* Drop the CPU's cached committed space back into the central pool. */
188-static int cpu_swap_callback(struct notifier_block *nfb,
189- unsigned long action,
190- void *hcpu)
191-{
192- long *committed;
193-
194- committed = &per_cpu(committed_space, (long)hcpu);
195- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
196- atomic_long_add(*committed, &vm_committed_space);
197- *committed = 0;
198- drain_cpu_pagevecs((long)hcpu);
199- }
200- return NOTIFY_OK;
201-}
202-#endif /* CONFIG_HOTPLUG_CPU */
203-#endif /* CONFIG_SMP */
204-
205 /*
206 * Perform any setup for the swap system
207 */
208@@ -537,7 +494,4 @@ void __init swap_setup(void)
209 * Right now other parts of the system means that we
210 * _really_ don't want to cluster much more
211 */
212-#ifdef CONFIG_HOTPLUG_CPU
213- hotcpu_notifier(cpu_swap_callback, 0);
214-#endif
215 }