]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blob - src/patches/suse-2.6.27.25/patches.fixes/mm-fix-Commited_AS-underflow-on-large-NR_CPUS
Changed checkfs to auto reboot after correctable fsck fixes.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.fixes / mm-fix-Commited_AS-underflow-on-large-NR_CPUS
1 From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
2 Subject: mm: fix Committed_AS underflow on large NR_CPUS environment
3 Patch-mainline: 2.6.30-rc5
4 Git-commit: 00a62ce91e554198ef28234c91c36f850f5a3bc9
5 References: bnc#505831
6
7 mm: fix Committed_AS underflow on large NR_CPUS environment
8
9 The Committed_AS field can underflow in certain situations:
10
11 > # while true; do cat /proc/meminfo | grep _AS; sleep 1; done | uniq -c
12 > 1 Committed_AS: 18446744073709323392 kB
13 > 11 Committed_AS: 18446744073709455488 kB
14 > 6 Committed_AS: 35136 kB
15 > 5 Committed_AS: 18446744073709454400 kB
16 > 7 Committed_AS: 35904 kB
17 > 3 Committed_AS: 18446744073709453248 kB
18 > 2 Committed_AS: 34752 kB
19 > 9 Committed_AS: 18446744073709453248 kB
20 > 8 Committed_AS: 34752 kB
21 > 3 Committed_AS: 18446744073709320960 kB
22 > 7 Committed_AS: 18446744073709454080 kB
23 > 3 Committed_AS: 18446744073709320960 kB
24 > 5 Committed_AS: 18446744073709454080 kB
25 > 6 Committed_AS: 18446744073709320960 kB
26
27 Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does
28 not check for underflow.
29
30 But NR_CPUS proportional isn't good calculation. In general,
31 possibility of lock contention is proportional to the number of online
32 cpus, not theorical maximum cpus (NR_CPUS).
33
34 The current kernel has generic percpu-counter stuff. using it is right
35 way. it makes code simplify and percpu_counter_read_positive() don't
36 make underflow issue.
37
38 Reported-by: Dave Hansen <dave@linux.vnet.ibm.com>
39 Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
40 Cc: Eric B Munson <ebmunson@us.ibm.com>
41 Cc: Mel Gorman <mel@csn.ul.ie>
42 Cc: Christoph Lameter <cl@linux-foundation.org>
43 Cc: <stable@kernel.org> [All kernel versions]
44 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
45 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
46 Backported-by: Jeff Mahoney <jeffm@suse.com>
47 Signed-off-by: Jeff Mahoney <jeffm@suse.com>
48 ---
49 fs/proc/proc_misc.c | 2 +-
50 include/linux/mman.h | 9 +++------
51 kernel/fork.c | 2 ++
52 mm/mmap.c | 8 ++------
53 mm/nommu.c | 9 +++------
54 mm/swap.c | 46 ----------------------------------------------
55 6 files changed, 11 insertions(+), 65 deletions(-)
56
57 --- a/fs/proc/proc_misc.c
58 +++ b/fs/proc/proc_misc.c
59 @@ -145,7 +145,7 @@ static int meminfo_read_proc(char *page,
60 #define K(x) ((x) << (PAGE_SHIFT - 10))
61 si_meminfo(&i);
62 si_swapinfo(&i);
63 - committed = atomic_long_read(&vm_committed_space);
64 + committed = percpu_counter_read_positive(&vm_committed_as);
65 allowed = ((totalram_pages - hugetlb_total_pages())
66 * sysctl_overcommit_ratio / 100) + total_swap_pages;
67
68 --- a/include/linux/mman.h
69 +++ b/include/linux/mman.h
70 @@ -12,21 +12,18 @@
71
72 #ifdef __KERNEL__
73 #include <linux/mm.h>
74 +#include <linux/percpu_counter.h>
75
76 #include <asm/atomic.h>
77
78 extern int sysctl_overcommit_memory;
79 extern int sysctl_overcommit_ratio;
80 -extern atomic_long_t vm_committed_space;
81 +extern struct percpu_counter vm_committed_as;
82
83 -#ifdef CONFIG_SMP
84 -extern void vm_acct_memory(long pages);
85 -#else
86 static inline void vm_acct_memory(long pages)
87 {
88 - atomic_long_add(pages, &vm_committed_space);
89 + percpu_counter_add(&vm_committed_as, pages);
90 }
91 -#endif
92
93 static inline void vm_unacct_memory(long pages)
94 {
95 --- a/kernel/fork.c
96 +++ b/kernel/fork.c
97 @@ -1442,6 +1442,8 @@ void __init proc_caches_init(void)
98 mm_cachep = kmem_cache_create("mm_struct",
99 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
100 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
101 + if (percpu_counter_init(&vm_committed_as, 0))
102 + panic("Failed to allocate vm_committed_as");
103 }
104
105 /*
106 --- a/mm/mmap.c
107 +++ b/mm/mmap.c
108 @@ -84,7 +84,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
109 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
110 int sysctl_overcommit_ratio = 50; /* default is 50% */
111 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
112 -atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
113 +struct percpu_counter vm_committed_as;
114 int heap_stack_gap __read_mostly = 1;
115
116 /*
117 @@ -178,11 +178,7 @@ int __vm_enough_memory(struct mm_struct
118 leave 3% of the size of this process for other processes */
119 allowed -= mm->total_vm / 32;
120
121 - /*
122 - * cast `allowed' as a signed long because vm_committed_space
123 - * sometimes has a negative value
124 - */
125 - if (atomic_long_read(&vm_committed_space) < (long)allowed)
126 + if (percpu_counter_read_positive(&vm_committed_as) < allowed)
127 return 0;
128 error:
129 vm_unacct_memory(pages);
130 --- a/mm/nommu.c
131 +++ b/mm/nommu.c
132 @@ -39,7 +39,7 @@ struct page *mem_map;
133 unsigned long max_mapnr;
134 unsigned long num_physpages;
135 unsigned long askedalloc, realalloc;
136 -atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
137 +struct percpu_counter vm_committed_as;
138 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
139 int sysctl_overcommit_ratio = 50; /* default is 50% */
140 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
141 @@ -1434,12 +1434,9 @@ int __vm_enough_memory(struct mm_struct
142 leave 3% of the size of this process for other processes */
143 allowed -= current->mm->total_vm / 32;
144
145 - /*
146 - * cast `allowed' as a signed long because vm_committed_space
147 - * sometimes has a negative value
148 - */
149 - if (atomic_long_read(&vm_committed_space) < (long)allowed)
150 + if (percpu_counter_read_positive(&vm_committed_as) < allowed)
151 return 0;
152 +
153 error:
154 vm_unacct_memory(pages);
155
156 --- a/mm/swap.c
157 +++ b/mm/swap.c
158 @@ -474,49 +474,6 @@ unsigned pagevec_lookup_tag(struct pagev
159
160 EXPORT_SYMBOL(pagevec_lookup_tag);
161
162 -#ifdef CONFIG_SMP
163 -/*
164 - * We tolerate a little inaccuracy to avoid ping-ponging the counter between
165 - * CPUs
166 - */
167 -#define ACCT_THRESHOLD max(16, NR_CPUS * 2)
168 -
169 -static DEFINE_PER_CPU(long, committed_space);
170 -
171 -void vm_acct_memory(long pages)
172 -{
173 - long *local;
174 -
175 - preempt_disable();
176 - local = &__get_cpu_var(committed_space);
177 - *local += pages;
178 - if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
179 - atomic_long_add(*local, &vm_committed_space);
180 - *local = 0;
181 - }
182 - preempt_enable();
183 -}
184 -
185 -#ifdef CONFIG_HOTPLUG_CPU
186 -
187 -/* Drop the CPU's cached committed space back into the central pool. */
188 -static int cpu_swap_callback(struct notifier_block *nfb,
189 - unsigned long action,
190 - void *hcpu)
191 -{
192 - long *committed;
193 -
194 - committed = &per_cpu(committed_space, (long)hcpu);
195 - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
196 - atomic_long_add(*committed, &vm_committed_space);
197 - *committed = 0;
198 - drain_cpu_pagevecs((long)hcpu);
199 - }
200 - return NOTIFY_OK;
201 -}
202 -#endif /* CONFIG_HOTPLUG_CPU */
203 -#endif /* CONFIG_SMP */
204 -
205 /*
206 * Perform any setup for the swap system
207 */
208 @@ -537,7 +494,4 @@ void __init swap_setup(void)
209 * Right now other parts of the system means that we
210 * _really_ don't want to cluster much more
211 */
212 -#ifdef CONFIG_HOTPLUG_CPU
213 - hotcpu_notifier(cpu_swap_callback, 0);
214 -#endif
215 }