]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blob - src/patches/suse-2.6.27.25/patches.suse/SoN-14-mm-reserve.patch
Updated xen patches taken from suse.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.suse / SoN-14-mm-reserve.patch
1 From: Peter Zijlstra <a.p.zijlstra@chello.nl>
2 Subject: mm: memory reserve management
3 Patch-mainline: No
4 References: FATE#303834
5
6 Generic reserve management code.
7
8 It provides methods to reserve and charge. Upon this, generic alloc/free style
9 reserve pools could be build, which could fully replace mempool_t
10 functionality.
11
12 It should also allow for a Banker's algorithm replacement of __GFP_NOFAIL.
13
14 Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
15 Acked-by: Neil Brown <neilb@suse.de>
16 Acked-by: Suresh Jayaraman <sjayaraman@suse.de>
17
18 ---
19 include/linux/reserve.h | 198 ++++++++++++++
20 include/linux/slab.h | 20 -
21 mm/Makefile | 2
22 mm/reserve.c | 637 ++++++++++++++++++++++++++++++++++++++++++++++++
23 mm/slub.c | 2
24 5 files changed, 848 insertions(+), 11 deletions(-)
25
26 Index: linux-2.6.27/include/linux/reserve.h
27 ===================================================================
28 --- /dev/null
29 +++ linux-2.6.27/include/linux/reserve.h
30 @@ -0,0 +1,198 @@
31 +/*
32 + * Memory reserve management.
33 + *
34 + * Copyright (C) 2007-2008 Red Hat, Inc.,
35 + * Peter Zijlstra <pzijlstr@redhat.com>
36 + *
37 + * This file contains the public data structure and API definitions.
38 + */
39 +
40 +#ifndef _LINUX_RESERVE_H
41 +#define _LINUX_RESERVE_H
42 +
43 +#include <linux/list.h>
44 +#include <linux/spinlock.h>
45 +#include <linux/wait.h>
46 +#include <linux/slab.h>
47 +
48 +struct mem_reserve {
49 + struct mem_reserve *parent;
50 + struct list_head children;
51 + struct list_head siblings;
52 +
53 + const char *name;
54 +
55 + long pages;
56 + long limit;
57 + long usage;
58 + spinlock_t lock; /* protects limit and usage */
59 +
60 + wait_queue_head_t waitqueue;
61 +};
62 +
63 +extern struct mem_reserve mem_reserve_root;
64 +
65 +void mem_reserve_init(struct mem_reserve *res, const char *name,
66 + struct mem_reserve *parent);
67 +int mem_reserve_connect(struct mem_reserve *new_child,
68 + struct mem_reserve *node);
69 +void mem_reserve_disconnect(struct mem_reserve *node);
70 +
71 +int mem_reserve_pages_set(struct mem_reserve *res, long pages);
72 +int mem_reserve_pages_add(struct mem_reserve *res, long pages);
73 +int mem_reserve_pages_charge(struct mem_reserve *res, long pages);
74 +
75 +int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes);
76 +int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes);
77 +
78 +struct kmem_cache;
79 +
80 +int mem_reserve_kmem_cache_set(struct mem_reserve *res,
81 + struct kmem_cache *s,
82 + int objects);
83 +int mem_reserve_kmem_cache_charge(struct mem_reserve *res,
84 + struct kmem_cache *s, long objs);
85 +
86 +void *___kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
87 + struct mem_reserve *res, int *emerg);
88 +
89 +static inline
90 +void *__kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
91 + struct mem_reserve *res, int *emerg)
92 +{
93 + void *obj;
94 +
95 + obj = __kmalloc_node_track_caller(size,
96 + flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node, ip);
97 + if (!obj)
98 + obj = ___kmalloc_reserve(size, flags, node, ip, res, emerg);
99 +
100 + return obj;
101 +}
102 +
103 +/**
104 + * kmalloc_reserve() - kmalloc() and charge against @res for @emerg allocations
105 + * @size - size of the requested memory region
106 + * @gfp - allocation flags to use for this allocation
107 + * @node - preferred memory node for this allocation
108 + * @res - reserve to charge emergency allocations against
109 + * @emerg - bit 0 is set when the allocation was an emergency allocation
110 + *
111 + * Returns NULL on failure
112 + */
113 +#define kmalloc_reserve(size, gfp, node, res, emerg) \
114 + __kmalloc_reserve(size, gfp, node, \
115 + __builtin_return_address(0), res, emerg)
116 +
117 +void __kfree_reserve(void *obj, struct mem_reserve *res, int emerg);
118 +
119 +/**
120 + * kfree_reserve() - kfree() and uncharge against @res for @emerg allocations
121 + * @obj - memory to free
122 + * @res - reserve to uncharge emergency allocations from
123 + * @emerg - was this an emergency allocation
124 + */
125 +static inline
126 +void kfree_reserve(void *obj, struct mem_reserve *res, int emerg)
127 +{
128 + if (unlikely(obj && res && emerg))
129 + __kfree_reserve(obj, res, emerg);
130 + else
131 + kfree(obj);
132 +}
133 +
134 +void *__kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
135 + struct mem_reserve *res, int *emerg);
136 +
137 +/**
138 + * kmem_cache_alloc_reserve() - kmem_cache_alloc() and charge against @res
139 + * @s - kmem_cache to allocate from
140 + * @gfp - allocation flags to use for this allocation
141 + * @node - preferred memory node for this allocation
142 + * @res - reserve to charge emergency allocations against
143 + * @emerg - bit 0 is set when the allocation was an emergency allocation
144 + *
145 + * Returns NULL on failure
146 + */
147 +static inline
148 +void *kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
149 + struct mem_reserve *res, int *emerg)
150 +{
151 + void *obj;
152 +
153 + obj = kmem_cache_alloc_node(s,
154 + flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node);
155 + if (!obj)
156 + obj = __kmem_cache_alloc_reserve(s, flags, node, res, emerg);
157 +
158 + return obj;
159 +}
160 +
161 +void __kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
162 + struct mem_reserve *res, int emerg);
163 +
164 +/**
165 + * kmem_cache_free_reserve() - kmem_cache_free() and uncharge against @res
166 + * @s - kmem_cache to free to
167 + * @obj - memory to free
168 + * @res - reserve to uncharge emergency allocations from
169 + * @emerg - was this an emergency allocation
170 + */
171 +static inline
172 +void kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
173 + struct mem_reserve *res, int emerg)
174 +{
175 + if (unlikely(obj && res && emerg))
176 + __kmem_cache_free_reserve(s, obj, res, emerg);
177 + else
178 + kmem_cache_free(s, obj);
179 +}
180 +
181 +struct page *__alloc_pages_reserve(int node, gfp_t flags, int order,
182 + struct mem_reserve *res, int *emerg);
183 +
184 +/**
185 + * alloc_pages_reserve() - alloc_pages() and charge against @res
186 + * @node - preferred memory node for this allocation
187 + * @gfp - allocation flags to use for this allocation
188 + * @order - page order
189 + * @res - reserve to charge emergency allocations against
190 + * @emerg - bit 0 is set when the allocation was an emergency allocation
191 + *
192 + * Returns NULL on failure
193 + */
194 +static inline
195 +struct page *alloc_pages_reserve(int node, gfp_t flags, int order,
196 + struct mem_reserve *res, int *emerg)
197 +{
198 + struct page *page;
199 +
200 + page = alloc_pages_node(node,
201 + flags | __GFP_NOMEMALLOC | __GFP_NOWARN, order);
202 + if (!page)
203 + page = __alloc_pages_reserve(node, flags, order, res, emerg);
204 +
205 + return page;
206 +}
207 +
208 +void __free_pages_reserve(struct page *page, int order,
209 + struct mem_reserve *res, int emerg);
210 +
211 +/**
212 + * free_pages_reserve() - __free_pages() and uncharge against @res
213 + * @page - page to free
214 + * @order - page order
215 + * @res - reserve to uncharge emergency allocations from
216 + * @emerg - was this an emergency allocation
217 + */
218 +static inline
219 +void free_pages_reserve(struct page *page, int order,
220 + struct mem_reserve *res, int emerg)
221 +{
222 + if (unlikely(page && res && emerg))
223 + __free_pages_reserve(page, order, res, emerg);
224 + else
225 + __free_pages(page, order);
226 +}
227 +
228 +#endif /* _LINUX_RESERVE_H */
229 Index: linux-2.6.27/mm/Makefile
230 ===================================================================
231 --- linux-2.6.27.orig/mm/Makefile
232 +++ linux-2.6.27/mm/Makefile
233 @@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o
234 maccess.o page_alloc.o page-writeback.o pdflush.o \
235 readahead.o swap.o truncate.o vmscan.o \
236 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
237 - page_isolation.o mm_init.o $(mmu-y)
238 + page_isolation.o mm_init.o reserve.o $(mmu-y)
239
240 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
241 obj-$(CONFIG_BOUNCE) += bounce.o
242 Index: linux-2.6.27/mm/reserve.c
243 ===================================================================
244 --- /dev/null
245 +++ linux-2.6.27/mm/reserve.c
246 @@ -0,0 +1,637 @@
247 +/*
248 + * Memory reserve management.
249 + *
250 + * Copyright (C) 2007-2008, Red Hat, Inc.,
251 + * Peter Zijlstra <pzijlstr@redhat.com>
252 + *
253 + * Description:
254 + *
255 + * Manage a set of memory reserves.
256 + *
257 + * A memory reserve is a reserve for a specified number of object of specified
258 + * size. Since memory is managed in pages, this reserve demand is then
259 + * translated into a page unit.
260 + *
261 + * So each reserve has a specified object limit, an object usage count and a
262 + * number of pages required to back these objects.
263 + *
264 + * Usage is charged against a reserve, if the charge fails, the resource must
265 + * not be allocated/used.
266 + *
267 + * The reserves are managed in a tree, and the resource demands (pages and
268 + * limit) are propagated up the tree. Obviously the object limit will be
269 + * meaningless as soon as the unit starts mixing, but the required page reserve
270 + * (being of one unit) is still valid at the root.
271 + *
272 + * It is the page demand of the root node that is used to set the global
273 + * reserve (adjust_memalloc_reserve() which sets zone->pages_emerg).
274 + *
275 + * As long as a subtree has the same usage unit, an aggregate node can be used
276 + * to charge against, instead of the leaf nodes. However, do be consistent with
277 + * who is charged, resource usage is not propagated up the tree (for
278 + * performance reasons).
279 + */
280 +
281 +#include <linux/reserve.h>
282 +#include <linux/mutex.h>
283 +#include <linux/mmzone.h>
284 +#include <linux/log2.h>
285 +#include <linux/proc_fs.h>
286 +#include <linux/seq_file.h>
287 +#include <linux/module.h>
288 +#include <linux/slab.h>
289 +#include <linux/sched.h>
290 +#include "internal.h"
291 +
292 +static DEFINE_MUTEX(mem_reserve_mutex);
293 +
294 +/**
295 + * @mem_reserve_root - the global reserve root
296 + *
297 + * The global reserve is empty, and has no limit unit, it merely
298 + * acts as an aggregation point for reserves and an interface to
299 + * adjust_memalloc_reserve().
300 + */
301 +struct mem_reserve mem_reserve_root = {
302 + .children = LIST_HEAD_INIT(mem_reserve_root.children),
303 + .siblings = LIST_HEAD_INIT(mem_reserve_root.siblings),
304 + .name = "total reserve",
305 + .lock = __SPIN_LOCK_UNLOCKED(mem_reserve_root.lock),
306 + .waitqueue = __WAIT_QUEUE_HEAD_INITIALIZER(mem_reserve_root.waitqueue),
307 +};
308 +EXPORT_SYMBOL_GPL(mem_reserve_root);
309 +
310 +/**
311 + * mem_reserve_init() - initialize a memory reserve object
312 + * @res - the new reserve object
313 + * @name - a name for this reserve
314 + * @parent - when non NULL, the parent to connect to.
315 + */
316 +void mem_reserve_init(struct mem_reserve *res, const char *name,
317 + struct mem_reserve *parent)
318 +{
319 + memset(res, 0, sizeof(*res));
320 + INIT_LIST_HEAD(&res->children);
321 + INIT_LIST_HEAD(&res->siblings);
322 + res->name = name;
323 + spin_lock_init(&res->lock);
324 + init_waitqueue_head(&res->waitqueue);
325 +
326 + if (parent)
327 + mem_reserve_connect(res, parent);
328 +}
329 +EXPORT_SYMBOL_GPL(mem_reserve_init);
330 +
331 +/*
332 + * propagate the pages and limit changes up the (sub)tree.
333 + */
334 +static void __calc_reserve(struct mem_reserve *res, long pages, long limit)
335 +{
336 + unsigned long flags;
337 +
338 + for ( ; res; res = res->parent) {
339 + res->pages += pages;
340 +
341 + if (limit) {
342 + spin_lock_irqsave(&res->lock, flags);
343 + res->limit += limit;
344 + spin_unlock_irqrestore(&res->lock, flags);
345 + }
346 + }
347 +}
348 +
349 +/**
350 + * __mem_reserve_add() - primitive to change the size of a reserve
351 + * @res - reserve to change
352 + * @pages - page delta
353 + * @limit - usage limit delta
354 + *
355 + * Returns -ENOMEM when a size increase is not possible atm.
356 + */
357 +static int __mem_reserve_add(struct mem_reserve *res, long pages, long limit)
358 +{
359 + int ret = 0;
360 + long reserve;
361 +
362 + /*
363 + * This looks more complex than need be, that is because we handle
364 + * the case where @res isn't actually connected to mem_reserve_root.
365 + *
366 + * So, by propagating the new pages up the (sub)tree and computing
367 + * the difference in mem_reserve_root.pages we find if this action
368 + * affects the actual reserve.
369 + *
370 + * The (partial) propagation also makes that mem_reserve_connect()
371 + * needs only look at the direct child, since each disconnected
372 + * sub-tree is fully up-to-date.
373 + */
374 + reserve = mem_reserve_root.pages;
375 + __calc_reserve(res, pages, 0);
376 + reserve = mem_reserve_root.pages - reserve;
377 +
378 + if (reserve) {
379 + ret = adjust_memalloc_reserve(reserve);
380 + if (ret)
381 + __calc_reserve(res, -pages, 0);
382 + }
383 +
384 + /*
385 + * Delay updating the limits until we've acquired the resources to
386 + * back it.
387 + */
388 + if (!ret)
389 + __calc_reserve(res, 0, limit);
390 +
391 + return ret;
392 +}
393 +
394 +/**
395 + * __mem_reserve_charge() - primitive to charge object usage of a reserve
396 + * @res - reserve to charge
397 + * @charge - size of the charge
398 + *
399 + * Returns non-zero on success, zero on failure.
400 + */
401 +static
402 +int __mem_reserve_charge(struct mem_reserve *res, long charge)
403 +{
404 + unsigned long flags;
405 + int ret = 0;
406 +
407 + spin_lock_irqsave(&res->lock, flags);
408 + if (charge < 0 || res->usage + charge < res->limit) {
409 + res->usage += charge;
410 + if (unlikely(res->usage < 0))
411 + res->usage = 0;
412 + ret = 1;
413 + }
414 + if (charge < 0)
415 + wake_up_all(&res->waitqueue);
416 + spin_unlock_irqrestore(&res->lock, flags);
417 +
418 + return ret;
419 +}
420 +
421 +/**
422 + * mem_reserve_connect() - connect a reserve to another in a child-parent relation
423 + * @new_child - the reserve node to connect (child)
424 + * @node - the reserve node to connect to (parent)
425 + *
426 + * Connecting a node results in an increase of the reserve by the amount of
427 + * pages in @new_child->pages if @node has a connection to mem_reserve_root.
428 + *
429 + * Returns -ENOMEM when the new connection would increase the reserve (parent
430 + * is connected to mem_reserve_root) and there is no memory to do so.
431 + *
432 + * On error, the child is _NOT_ connected.
433 + */
434 +int mem_reserve_connect(struct mem_reserve *new_child, struct mem_reserve *node)
435 +{
436 + int ret;
437 +
438 + WARN_ON(!new_child->name);
439 +
440 + mutex_lock(&mem_reserve_mutex);
441 + if (new_child->parent) {
442 + ret = -EEXIST;
443 + goto unlock;
444 + }
445 + new_child->parent = node;
446 + list_add(&new_child->siblings, &node->children);
447 + ret = __mem_reserve_add(node, new_child->pages, new_child->limit);
448 + if (ret) {
449 + new_child->parent = NULL;
450 + list_del_init(&new_child->siblings);
451 + }
452 +unlock:
453 + mutex_unlock(&mem_reserve_mutex);
454 +
455 + return ret;
456 +}
457 +EXPORT_SYMBOL_GPL(mem_reserve_connect);
458 +
459 +/**
460 + * mem_reserve_disconnect() - sever a nodes connection to the reserve tree
461 + * @node - the node to disconnect
462 + *
463 + * Disconnecting a node results in a reduction of the reserve by @node->pages
464 + * if node had a connection to mem_reserve_root.
465 + */
466 +void mem_reserve_disconnect(struct mem_reserve *node)
467 +{
468 + int ret;
469 +
470 + BUG_ON(!node->parent);
471 +
472 + mutex_lock(&mem_reserve_mutex);
473 + if (!node->parent) {
474 + ret = -ENOENT;
475 + goto unlock;
476 + }
477 + ret = __mem_reserve_add(node->parent, -node->pages, -node->limit);
478 + if (!ret) {
479 + node->parent = NULL;
480 + list_del_init(&node->siblings);
481 + }
482 +unlock:
483 + mutex_unlock(&mem_reserve_mutex);
484 +
485 + /*
486 + * We cannot fail to shrink the reserves, can we?
487 + */
488 + WARN_ON(ret);
489 +}
490 +EXPORT_SYMBOL_GPL(mem_reserve_disconnect);
491 +
492 +#ifdef CONFIG_PROC_FS
493 +
494 +/*
495 + * Simple output of the reserve tree in: /proc/reserve_info
496 + * Example:
497 + *
498 + * localhost ~ # cat /proc/reserve_info
499 + * 1:0 "total reserve" 6232K 0/278581
500 + * 2:1 "total network reserve" 6232K 0/278581
501 + * 3:2 "network TX reserve" 212K 0/53
502 + * 4:3 "protocol TX pages" 212K 0/53
503 + * 5:2 "network RX reserve" 6020K 0/278528
504 + * 6:5 "IPv4 route cache" 5508K 0/16384
505 + * 7:5 "SKB data reserve" 512K 0/262144
506 + * 8:7 "IPv4 fragment cache" 512K 0/262144
507 + */
508 +
509 +static void mem_reserve_show_item(struct seq_file *m, struct mem_reserve *res,
510 + unsigned int parent, unsigned int *id)
511 +{
512 + struct mem_reserve *child;
513 + unsigned int my_id = ++*id;
514 +
515 + seq_printf(m, "%d:%d \"%s\" %ldK %ld/%ld\n",
516 + my_id, parent, res->name,
517 + res->pages << (PAGE_SHIFT - 10),
518 + res->usage, res->limit);
519 +
520 + list_for_each_entry(child, &res->children, siblings)
521 + mem_reserve_show_item(m, child, my_id, id);
522 +}
523 +
524 +static int mem_reserve_show(struct seq_file *m, void *v)
525 +{
526 + unsigned int ident = 0;
527 +
528 + mutex_lock(&mem_reserve_mutex);
529 + mem_reserve_show_item(m, &mem_reserve_root, ident, &ident);
530 + mutex_unlock(&mem_reserve_mutex);
531 +
532 + return 0;
533 +}
534 +
535 +static int mem_reserve_open(struct inode *inode, struct file *file)
536 +{
537 + return single_open(file, mem_reserve_show, NULL);
538 +}
539 +
540 +static const struct file_operations mem_reserve_opterations = {
541 + .open = mem_reserve_open,
542 + .read = seq_read,
543 + .llseek = seq_lseek,
544 + .release = single_release,
545 +};
546 +
547 +static __init int mem_reserve_proc_init(void)
548 +{
549 + proc_create("reserve_info", S_IRUSR, NULL, &mem_reserve_opterations);
550 + return 0;
551 +}
552 +
553 +module_init(mem_reserve_proc_init);
554 +
555 +#endif
556 +
557 +/*
558 + * alloc_page helpers
559 + */
560 +
561 +/**
562 + * mem_reserve_pages_set() - set reserves size in pages
563 + * @res - reserve to set
564 + * @pages - size in pages to set it to
565 + *
566 + * Returns -ENOMEM when it fails to set the reserve. On failure the old size
567 + * is preserved.
568 + */
569 +int mem_reserve_pages_set(struct mem_reserve *res, long pages)
570 +{
571 + int ret;
572 +
573 + mutex_lock(&mem_reserve_mutex);
574 + pages -= res->pages;
575 + ret = __mem_reserve_add(res, pages, pages * PAGE_SIZE);
576 + mutex_unlock(&mem_reserve_mutex);
577 +
578 + return ret;
579 +}
580 +EXPORT_SYMBOL_GPL(mem_reserve_pages_set);
581 +
582 +/**
583 + * mem_reserve_pages_add() - change the size in a relative way
584 + * @res - reserve to change
585 + * @pages - number of pages to add (or subtract when negative)
586 + *
587 + * Similar to mem_reserve_pages_set, except that the argument is relative
588 + * instead of absolute.
589 + *
590 + * Returns -ENOMEM when it fails to increase.
591 + */
592 +int mem_reserve_pages_add(struct mem_reserve *res, long pages)
593 +{
594 + int ret;
595 +
596 + mutex_lock(&mem_reserve_mutex);
597 + ret = __mem_reserve_add(res, pages, pages * PAGE_SIZE);
598 + mutex_unlock(&mem_reserve_mutex);
599 +
600 + return ret;
601 +}
602 +
603 +/**
604 + * mem_reserve_pages_charge() - charge page usage to a reserve
605 + * @res - reserve to charge
606 + * @pages - size to charge
607 + *
608 + * Returns non-zero on success.
609 + */
610 +int mem_reserve_pages_charge(struct mem_reserve *res, long pages)
611 +{
612 + return __mem_reserve_charge(res, pages * PAGE_SIZE);
613 +}
614 +EXPORT_SYMBOL_GPL(mem_reserve_pages_charge);
615 +
616 +/*
617 + * kmalloc helpers
618 + */
619 +
620 +/**
621 + * mem_reserve_kmalloc_set() - set this reserve to bytes worth of kmalloc
622 + * @res - reserve to change
623 + * @bytes - size in bytes to reserve
624 + *
625 + * Returns -ENOMEM on failure.
626 + */
627 +int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes)
628 +{
629 + int ret;
630 + long pages;
631 +
632 + mutex_lock(&mem_reserve_mutex);
633 + pages = kmalloc_estimate_bytes(GFP_ATOMIC, bytes);
634 + pages -= res->pages;
635 + bytes -= res->limit;
636 + ret = __mem_reserve_add(res, pages, bytes);
637 + mutex_unlock(&mem_reserve_mutex);
638 +
639 + return ret;
640 +}
641 +EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_set);
642 +
643 +/**
644 + * mem_reserve_kmalloc_charge() - charge bytes to a reserve
645 + * @res - reserve to charge
646 + * @bytes - bytes to charge
647 + *
648 + * Returns non-zero on success.
649 + */
650 +int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes)
651 +{
652 + if (bytes < 0)
653 + bytes = -roundup_pow_of_two(-bytes);
654 + else
655 + bytes = roundup_pow_of_two(bytes);
656 +
657 + return __mem_reserve_charge(res, bytes);
658 +}
659 +EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_charge);
660 +
661 +/*
662 + * kmem_cache helpers
663 + */
664 +
665 +/**
666 + * mem_reserve_kmem_cache_set() - set reserve to @objects worth of kmem_cache_alloc of @s
667 + * @res - reserve to set
668 + * @s - kmem_cache to reserve from
669 + * @objects - number of objects to reserve
670 + *
671 + * Returns -ENOMEM on failure.
672 + */
673 +int mem_reserve_kmem_cache_set(struct mem_reserve *res, struct kmem_cache *s,
674 + int objects)
675 +{
676 + int ret;
677 + long pages, bytes;
678 +
679 + mutex_lock(&mem_reserve_mutex);
680 + pages = kmem_alloc_estimate(s, GFP_ATOMIC, objects);
681 + pages -= res->pages;
682 + bytes = objects * kmem_cache_size(s) - res->limit;
683 + ret = __mem_reserve_add(res, pages, bytes);
684 + mutex_unlock(&mem_reserve_mutex);
685 +
686 + return ret;
687 +}
688 +EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_set);
689 +
690 +/**
691 + * mem_reserve_kmem_cache_charge() - charge (or uncharge) usage of objs
692 + * @res - reserve to charge
693 + * @objs - objects to charge for
694 + *
695 + * Returns non-zero on success.
696 + */
697 +int mem_reserve_kmem_cache_charge(struct mem_reserve *res, struct kmem_cache *s,
698 + long objs)
699 +{
700 + return __mem_reserve_charge(res, objs * kmem_cache_size(s));
701 +}
702 +EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_charge);
703 +
704 +/*
705 + * Alloc wrappers.
706 + *
707 + * Actual usage is commented in linux/reserve.h where the interface functions
708 + * live. Furthermore, the code is 3 instances of the same paradigm, hence only
709 + * the first contains extensive comments.
710 + */
711 +
712 +/*
713 + * kmalloc/kfree
714 + */
715 +
716 +void *___kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
717 + struct mem_reserve *res, int *emerg)
718 +{
719 + void *obj;
720 + gfp_t gfp;
721 +
722 + /*
723 + * Try a regular allocation, when that fails and we're not entitled
724 + * to the reserves, fail.
725 + */
726 + gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
727 + obj = __kmalloc_node_track_caller(size, gfp, node, ip);
728 +
729 + if (obj || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
730 + goto out;
731 +
732 + /*
733 + * If we were given a reserve to charge against, try that.
734 + */
735 + if (res && !mem_reserve_kmalloc_charge(res, size)) {
736 + /*
737 + * If we failed to charge and we're not allowed to wait for
738 + * it to succeed, bail.
739 + */
740 + if (!(flags & __GFP_WAIT))
741 + goto out;
742 +
743 + /*
744 + * Wait for a successfull charge against the reserve. All
745 + * uncharge operations against this reserve will wake us up.
746 + */
747 + wait_event(res->waitqueue,
748 + mem_reserve_kmalloc_charge(res, size));
749 +
750 + /*
751 + * After waiting for it, again try a regular allocation.
752 + * Pressure could have lifted during our sleep. If this
753 + * succeeds, uncharge the reserve.
754 + */
755 + obj = __kmalloc_node_track_caller(size, gfp, node, ip);
756 + if (obj) {
757 + mem_reserve_kmalloc_charge(res, -size);
758 + goto out;
759 + }
760 + }
761 +
762 + /*
763 + * Regular allocation failed, and we've successfully charged our
764 + * requested usage against the reserve. Do the emergency allocation.
765 + */
766 + obj = __kmalloc_node_track_caller(size, flags, node, ip);
767 + WARN_ON(!obj);
768 + if (emerg)
769 + *emerg = 1;
770 +
771 +out:
772 + return obj;
773 +}
774 +
775 +void __kfree_reserve(void *obj, struct mem_reserve *res, int emerg)
776 +{
777 + /*
778 + * ksize gives the full allocated size vs the requested size we used to
779 + * charge; however since we round up to the nearest power of two, this
780 + * should all work nicely.
781 + */
782 + size_t size = ksize(obj);
783 +
784 + kfree(obj);
785 + /*
786 + * Free before uncharge, this ensures memory is actually present when
787 + * a subsequent charge succeeds.
788 + */
789 + mem_reserve_kmalloc_charge(res, -size);
790 +}
791 +
792 +/*
793 + * kmem_cache_alloc/kmem_cache_free
794 + */
795 +
796 +void *__kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
797 + struct mem_reserve *res, int *emerg)
798 +{
799 + void *obj;
800 + gfp_t gfp;
801 +
802 + gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
803 + obj = kmem_cache_alloc_node(s, gfp, node);
804 +
805 + if (obj || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
806 + goto out;
807 +
808 + if (res && !mem_reserve_kmem_cache_charge(res, s, 1)) {
809 + if (!(flags & __GFP_WAIT))
810 + goto out;
811 +
812 + wait_event(res->waitqueue,
813 + mem_reserve_kmem_cache_charge(res, s, 1));
814 +
815 + obj = kmem_cache_alloc_node(s, gfp, node);
816 + if (obj) {
817 + mem_reserve_kmem_cache_charge(res, s, -1);
818 + goto out;
819 + }
820 + }
821 +
822 + obj = kmem_cache_alloc_node(s, flags, node);
823 + WARN_ON(!obj);
824 + if (emerg)
825 + *emerg = 1;
826 +
827 +out:
828 + return obj;
829 +}
830 +
831 +void __kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
832 + struct mem_reserve *res, int emerg)
833 +{
834 + kmem_cache_free(s, obj);
835 + mem_reserve_kmem_cache_charge(res, s, -1);
836 +}
837 +
838 +/*
839 + * alloc_pages/free_pages
840 + */
841 +
842 +struct page *__alloc_pages_reserve(int node, gfp_t flags, int order,
843 + struct mem_reserve *res, int *emerg)
844 +{
845 + struct page *page;
846 + gfp_t gfp;
847 + long pages = 1 << order;
848 +
849 + gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
850 + page = alloc_pages_node(node, gfp, order);
851 +
852 + if (page || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
853 + goto out;
854 +
855 + if (res && !mem_reserve_pages_charge(res, pages)) {
856 + if (!(flags & __GFP_WAIT))
857 + goto out;
858 +
859 + wait_event(res->waitqueue,
860 + mem_reserve_pages_charge(res, pages));
861 +
862 + page = alloc_pages_node(node, gfp, order);
863 + if (page) {
864 + mem_reserve_pages_charge(res, -pages);
865 + goto out;
866 + }
867 + }
868 +
869 + page = alloc_pages_node(node, flags, order);
870 + WARN_ON(!page);
871 + if (emerg)
872 + *emerg = 1;
873 +
874 +out:
875 + return page;
876 +}
877 +
878 +void __free_pages_reserve(struct page *page, int order,
879 + struct mem_reserve *res, int emerg)
880 +{
881 + __free_pages(page, order);
882 + mem_reserve_pages_charge(res, -(1 << order));
883 +}
884 Index: linux-2.6.27/include/linux/slab.h
885 ===================================================================
886 --- linux-2.6.27.orig/include/linux/slab.h
887 +++ linux-2.6.27/include/linux/slab.h
888 @@ -230,13 +230,14 @@ static inline void *kmem_cache_alloc_nod
889 */
890 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
891 extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
892 -#define kmalloc_track_caller(size, flags) \
893 - __kmalloc_track_caller(size, flags, __builtin_return_address(0))
894 #else
895 -#define kmalloc_track_caller(size, flags) \
896 +#define __kmalloc_track_caller(size, flags, ip) \
897 __kmalloc(size, flags)
898 #endif /* DEBUG_SLAB */
899
900 +#define kmalloc_track_caller(size, flags) \
901 + __kmalloc_track_caller(size, flags, __builtin_return_address(0))
902 +
903 #ifdef CONFIG_NUMA
904 /*
905 * kmalloc_node_track_caller is a special version of kmalloc_node that
906 @@ -248,21 +249,22 @@ extern void *__kmalloc_track_caller(size
907 */
908 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
909 extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *);
910 -#define kmalloc_node_track_caller(size, flags, node) \
911 - __kmalloc_node_track_caller(size, flags, node, \
912 - __builtin_return_address(0))
913 #else
914 -#define kmalloc_node_track_caller(size, flags, node) \
915 +#define __kmalloc_node_track_caller(size, flags, node, ip) \
916 __kmalloc_node(size, flags, node)
917 #endif
918
919 #else /* CONFIG_NUMA */
920
921 -#define kmalloc_node_track_caller(size, flags, node) \
922 - kmalloc_track_caller(size, flags)
923 +#define __kmalloc_node_track_caller(size, flags, node, ip) \
924 + __kmalloc_track_caller(size, flags, ip)
925
926 #endif /* DEBUG_SLAB */
927
928 +#define kmalloc_node_track_caller(size, flags, node) \
929 + __kmalloc_node_track_caller(size, flags, node, \
930 + __builtin_return_address(0))
931 +
932 /*
933 * Shortcuts
934 */
935 Index: linux-2.6.27/mm/slub.c
936 ===================================================================
937 --- linux-2.6.27.orig/mm/slub.c
938 +++ linux-2.6.27/mm/slub.c
939 @@ -2726,6 +2726,7 @@ void *__kmalloc(size_t size, gfp_t flags
940 }
941 EXPORT_SYMBOL(__kmalloc);
942
943 +#ifdef CONFIG_NUMA
944 static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
945 {
946 struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
947 @@ -2737,7 +2738,6 @@ static void *kmalloc_large_node(size_t s
948 return NULL;
949 }
950
951 -#ifdef CONFIG_NUMA
952 void *__kmalloc_node(size_t size, gfp_t flags, int node)
953 {
954 struct kmem_cache *s;