1 From: Peter Zijlstra <a.p.zijlstra@chello.nl>
2 Subject: mm: memory reserve management
4 References: FATE#303834
6 Generic reserve management code.
8 It provides methods to reserve and charge. Upon this, generic alloc/free style
9 reserve pools could be build, which could fully replace mempool_t
12 It should also allow for a Banker's algorithm replacement of __GFP_NOFAIL.
14 Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
15 Acked-by: Neil Brown <neilb@suse.de>
16 Acked-by: Suresh Jayaraman <sjayaraman@suse.de>
19 include/linux/reserve.h | 198 ++++++++++++++
20 include/linux/slab.h | 20 -
22 mm/reserve.c | 637 ++++++++++++++++++++++++++++++++++++++++++++++++
24 5 files changed, 848 insertions(+), 11 deletions(-)
27 +++ b/include/linux/reserve.h
30 + * Memory reserve management.
32 + * Copyright (C) 2007-2008 Red Hat, Inc.,
33 + * Peter Zijlstra <pzijlstr@redhat.com>
35 + * This file contains the public data structure and API definitions.
38 +#ifndef _LINUX_RESERVE_H
39 +#define _LINUX_RESERVE_H
41 +#include <linux/list.h>
42 +#include <linux/spinlock.h>
43 +#include <linux/wait.h>
44 +#include <linux/slab.h>
47 + struct mem_reserve *parent;
48 + struct list_head children;
49 + struct list_head siblings;
56 + spinlock_t lock; /* protects limit and usage */
58 + wait_queue_head_t waitqueue;
61 +extern struct mem_reserve mem_reserve_root;
63 +void mem_reserve_init(struct mem_reserve *res, const char *name,
64 + struct mem_reserve *parent);
65 +int mem_reserve_connect(struct mem_reserve *new_child,
66 + struct mem_reserve *node);
67 +void mem_reserve_disconnect(struct mem_reserve *node);
69 +int mem_reserve_pages_set(struct mem_reserve *res, long pages);
70 +int mem_reserve_pages_add(struct mem_reserve *res, long pages);
71 +int mem_reserve_pages_charge(struct mem_reserve *res, long pages);
73 +int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes);
74 +int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes);
78 +int mem_reserve_kmem_cache_set(struct mem_reserve *res,
79 + struct kmem_cache *s,
81 +int mem_reserve_kmem_cache_charge(struct mem_reserve *res,
82 + struct kmem_cache *s, long objs);
84 +void *___kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
85 + struct mem_reserve *res, int *emerg);
88 +void *__kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
89 + struct mem_reserve *res, int *emerg)
93 + obj = __kmalloc_node_track_caller(size,
94 + flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node, ip);
96 + obj = ___kmalloc_reserve(size, flags, node, ip, res, emerg);
102 + * kmalloc_reserve() - kmalloc() and charge against @res for @emerg allocations
103 + * @size - size of the requested memory region
104 + * @gfp - allocation flags to use for this allocation
105 + * @node - preferred memory node for this allocation
106 + * @res - reserve to charge emergency allocations against
107 + * @emerg - bit 0 is set when the allocation was an emergency allocation
109 + * Returns NULL on failure
111 +#define kmalloc_reserve(size, gfp, node, res, emerg) \
112 + __kmalloc_reserve(size, gfp, node, \
113 + __builtin_return_address(0), res, emerg)
115 +void __kfree_reserve(void *obj, struct mem_reserve *res, int emerg);
118 + * kfree_reserve() - kfree() and uncharge against @res for @emerg allocations
119 + * @obj - memory to free
120 + * @res - reserve to uncharge emergency allocations from
121 + * @emerg - was this an emergency allocation
124 +void kfree_reserve(void *obj, struct mem_reserve *res, int emerg)
126 + if (unlikely(obj && res && emerg))
127 + __kfree_reserve(obj, res, emerg);
132 +void *__kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
133 + struct mem_reserve *res, int *emerg);
136 + * kmem_cache_alloc_reserve() - kmem_cache_alloc() and charge against @res
137 + * @s - kmem_cache to allocate from
138 + * @gfp - allocation flags to use for this allocation
139 + * @node - preferred memory node for this allocation
140 + * @res - reserve to charge emergency allocations against
141 + * @emerg - bit 0 is set when the allocation was an emergency allocation
143 + * Returns NULL on failure
146 +void *kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
147 + struct mem_reserve *res, int *emerg)
151 + obj = kmem_cache_alloc_node(s,
152 + flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node);
154 + obj = __kmem_cache_alloc_reserve(s, flags, node, res, emerg);
159 +void __kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
160 + struct mem_reserve *res, int emerg);
163 + * kmem_cache_free_reserve() - kmem_cache_free() and uncharge against @res
164 + * @s - kmem_cache to free to
165 + * @obj - memory to free
166 + * @res - reserve to uncharge emergency allocations from
167 + * @emerg - was this an emergency allocation
170 +void kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
171 + struct mem_reserve *res, int emerg)
173 + if (unlikely(obj && res && emerg))
174 + __kmem_cache_free_reserve(s, obj, res, emerg);
176 + kmem_cache_free(s, obj);
179 +struct page *__alloc_pages_reserve(int node, gfp_t flags, int order,
180 + struct mem_reserve *res, int *emerg);
183 + * alloc_pages_reserve() - alloc_pages() and charge against @res
184 + * @node - preferred memory node for this allocation
185 + * @gfp - allocation flags to use for this allocation
186 + * @order - page order
187 + * @res - reserve to charge emergency allocations against
188 + * @emerg - bit 0 is set when the allocation was an emergency allocation
190 + * Returns NULL on failure
193 +struct page *alloc_pages_reserve(int node, gfp_t flags, int order,
194 + struct mem_reserve *res, int *emerg)
198 + page = alloc_pages_node(node,
199 + flags | __GFP_NOMEMALLOC | __GFP_NOWARN, order);
201 + page = __alloc_pages_reserve(node, flags, order, res, emerg);
206 +void __free_pages_reserve(struct page *page, int order,
207 + struct mem_reserve *res, int emerg);
210 + * free_pages_reserve() - __free_pages() and uncharge against @res
211 + * @page - page to free
212 + * @order - page order
213 + * @res - reserve to uncharge emergency allocations from
214 + * @emerg - was this an emergency allocation
217 +void free_pages_reserve(struct page *page, int order,
218 + struct mem_reserve *res, int emerg)
220 + if (unlikely(page && res && emerg))
221 + __free_pages_reserve(page, order, res, emerg);
223 + __free_pages(page, order);
226 +#endif /* _LINUX_RESERVE_H */
227 --- a/include/linux/slab.h
228 +++ b/include/linux/slab.h
229 @@ -230,13 +230,14 @@ static inline void *kmem_cache_alloc_nod
231 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
232 extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
233 -#define kmalloc_track_caller(size, flags) \
234 - __kmalloc_track_caller(size, flags, __builtin_return_address(0))
236 -#define kmalloc_track_caller(size, flags) \
237 +#define __kmalloc_track_caller(size, flags, ip) \
238 __kmalloc(size, flags)
239 #endif /* DEBUG_SLAB */
241 +#define kmalloc_track_caller(size, flags) \
242 + __kmalloc_track_caller(size, flags, __builtin_return_address(0))
246 * kmalloc_node_track_caller is a special version of kmalloc_node that
247 @@ -248,21 +249,22 @@ extern void *__kmalloc_track_caller(size
249 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
250 extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *);
251 -#define kmalloc_node_track_caller(size, flags, node) \
252 - __kmalloc_node_track_caller(size, flags, node, \
253 - __builtin_return_address(0))
255 -#define kmalloc_node_track_caller(size, flags, node) \
256 +#define __kmalloc_node_track_caller(size, flags, node, ip) \
257 __kmalloc_node(size, flags, node)
260 #else /* CONFIG_NUMA */
262 -#define kmalloc_node_track_caller(size, flags, node) \
263 - kmalloc_track_caller(size, flags)
264 +#define __kmalloc_node_track_caller(size, flags, node, ip) \
265 + __kmalloc_track_caller(size, flags, ip)
267 #endif /* DEBUG_SLAB */
269 +#define kmalloc_node_track_caller(size, flags, node) \
270 + __kmalloc_node_track_caller(size, flags, node, \
271 + __builtin_return_address(0))
278 @@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o
279 maccess.o page_alloc.o page-writeback.o pdflush.o \
280 readahead.o swap.o truncate.o vmscan.o \
281 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
282 - page_isolation.o mm_init.o $(mmu-y)
283 + page_isolation.o mm_init.o reserve.o $(mmu-y)
285 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
286 obj-$(CONFIG_BOUNCE) += bounce.o
291 + * Memory reserve management.
293 + * Copyright (C) 2007-2008, Red Hat, Inc.,
294 + * Peter Zijlstra <pzijlstr@redhat.com>
298 + * Manage a set of memory reserves.
300 + * A memory reserve is a reserve for a specified number of object of specified
301 + * size. Since memory is managed in pages, this reserve demand is then
302 + * translated into a page unit.
304 + * So each reserve has a specified object limit, an object usage count and a
305 + * number of pages required to back these objects.
307 + * Usage is charged against a reserve, if the charge fails, the resource must
308 + * not be allocated/used.
310 + * The reserves are managed in a tree, and the resource demands (pages and
311 + * limit) are propagated up the tree. Obviously the object limit will be
312 + * meaningless as soon as the unit starts mixing, but the required page reserve
313 + * (being of one unit) is still valid at the root.
315 + * It is the page demand of the root node that is used to set the global
316 + * reserve (adjust_memalloc_reserve() which sets zone->pages_emerg).
318 + * As long as a subtree has the same usage unit, an aggregate node can be used
319 + * to charge against, instead of the leaf nodes. However, do be consistent with
320 + * who is charged, resource usage is not propagated up the tree (for
321 + * performance reasons).
324 +#include <linux/reserve.h>
325 +#include <linux/mutex.h>
326 +#include <linux/mmzone.h>
327 +#include <linux/log2.h>
328 +#include <linux/proc_fs.h>
329 +#include <linux/seq_file.h>
330 +#include <linux/module.h>
331 +#include <linux/slab.h>
332 +#include <linux/sched.h>
333 +#include "internal.h"
335 +static DEFINE_MUTEX(mem_reserve_mutex);
338 + * @mem_reserve_root - the global reserve root
340 + * The global reserve is empty, and has no limit unit, it merely
341 + * acts as an aggregation point for reserves and an interface to
342 + * adjust_memalloc_reserve().
344 +struct mem_reserve mem_reserve_root = {
345 + .children = LIST_HEAD_INIT(mem_reserve_root.children),
346 + .siblings = LIST_HEAD_INIT(mem_reserve_root.siblings),
347 + .name = "total reserve",
348 + .lock = __SPIN_LOCK_UNLOCKED(mem_reserve_root.lock),
349 + .waitqueue = __WAIT_QUEUE_HEAD_INITIALIZER(mem_reserve_root.waitqueue),
351 +EXPORT_SYMBOL_GPL(mem_reserve_root);
354 + * mem_reserve_init() - initialize a memory reserve object
355 + * @res - the new reserve object
356 + * @name - a name for this reserve
357 + * @parent - when non NULL, the parent to connect to.
359 +void mem_reserve_init(struct mem_reserve *res, const char *name,
360 + struct mem_reserve *parent)
362 + memset(res, 0, sizeof(*res));
363 + INIT_LIST_HEAD(&res->children);
364 + INIT_LIST_HEAD(&res->siblings);
366 + spin_lock_init(&res->lock);
367 + init_waitqueue_head(&res->waitqueue);
370 + mem_reserve_connect(res, parent);
372 +EXPORT_SYMBOL_GPL(mem_reserve_init);
375 + * propagate the pages and limit changes up the (sub)tree.
377 +static void __calc_reserve(struct mem_reserve *res, long pages, long limit)
379 + unsigned long flags;
381 + for ( ; res; res = res->parent) {
382 + res->pages += pages;
385 + spin_lock_irqsave(&res->lock, flags);
386 + res->limit += limit;
387 + spin_unlock_irqrestore(&res->lock, flags);
393 + * __mem_reserve_add() - primitive to change the size of a reserve
394 + * @res - reserve to change
395 + * @pages - page delta
396 + * @limit - usage limit delta
398 + * Returns -ENOMEM when a size increase is not possible atm.
400 +static int __mem_reserve_add(struct mem_reserve *res, long pages, long limit)
406 + * This looks more complex than need be, that is because we handle
407 + * the case where @res isn't actually connected to mem_reserve_root.
409 + * So, by propagating the new pages up the (sub)tree and computing
410 + * the difference in mem_reserve_root.pages we find if this action
411 + * affects the actual reserve.
413 + * The (partial) propagation also makes that mem_reserve_connect()
414 + * needs only look at the direct child, since each disconnected
415 + * sub-tree is fully up-to-date.
417 + reserve = mem_reserve_root.pages;
418 + __calc_reserve(res, pages, 0);
419 + reserve = mem_reserve_root.pages - reserve;
422 + ret = adjust_memalloc_reserve(reserve);
424 + __calc_reserve(res, -pages, 0);
428 + * Delay updating the limits until we've acquired the resources to
432 + __calc_reserve(res, 0, limit);
438 + * __mem_reserve_charge() - primitive to charge object usage of a reserve
439 + * @res - reserve to charge
440 + * @charge - size of the charge
442 + * Returns non-zero on success, zero on failure.
445 +int __mem_reserve_charge(struct mem_reserve *res, long charge)
447 + unsigned long flags;
450 + spin_lock_irqsave(&res->lock, flags);
451 + if (charge < 0 || res->usage + charge < res->limit) {
452 + res->usage += charge;
453 + if (unlikely(res->usage < 0))
458 + wake_up_all(&res->waitqueue);
459 + spin_unlock_irqrestore(&res->lock, flags);
465 + * mem_reserve_connect() - connect a reserve to another in a child-parent relation
466 + * @new_child - the reserve node to connect (child)
467 + * @node - the reserve node to connect to (parent)
469 + * Connecting a node results in an increase of the reserve by the amount of
470 + * pages in @new_child->pages if @node has a connection to mem_reserve_root.
472 + * Returns -ENOMEM when the new connection would increase the reserve (parent
473 + * is connected to mem_reserve_root) and there is no memory to do so.
475 + * On error, the child is _NOT_ connected.
477 +int mem_reserve_connect(struct mem_reserve *new_child, struct mem_reserve *node)
481 + WARN_ON(!new_child->name);
483 + mutex_lock(&mem_reserve_mutex);
484 + if (new_child->parent) {
488 + new_child->parent = node;
489 + list_add(&new_child->siblings, &node->children);
490 + ret = __mem_reserve_add(node, new_child->pages, new_child->limit);
492 + new_child->parent = NULL;
493 + list_del_init(&new_child->siblings);
496 + mutex_unlock(&mem_reserve_mutex);
500 +EXPORT_SYMBOL_GPL(mem_reserve_connect);
503 + * mem_reserve_disconnect() - sever a nodes connection to the reserve tree
504 + * @node - the node to disconnect
506 + * Disconnecting a node results in a reduction of the reserve by @node->pages
507 + * if node had a connection to mem_reserve_root.
509 +void mem_reserve_disconnect(struct mem_reserve *node)
513 + BUG_ON(!node->parent);
515 + mutex_lock(&mem_reserve_mutex);
516 + if (!node->parent) {
520 + ret = __mem_reserve_add(node->parent, -node->pages, -node->limit);
522 + node->parent = NULL;
523 + list_del_init(&node->siblings);
526 + mutex_unlock(&mem_reserve_mutex);
529 + * We cannot fail to shrink the reserves, can we?
533 +EXPORT_SYMBOL_GPL(mem_reserve_disconnect);
535 +#ifdef CONFIG_PROC_FS
538 + * Simple output of the reserve tree in: /proc/reserve_info
541 + * localhost ~ # cat /proc/reserve_info
542 + * 1:0 "total reserve" 6232K 0/278581
543 + * 2:1 "total network reserve" 6232K 0/278581
544 + * 3:2 "network TX reserve" 212K 0/53
545 + * 4:3 "protocol TX pages" 212K 0/53
546 + * 5:2 "network RX reserve" 6020K 0/278528
547 + * 6:5 "IPv4 route cache" 5508K 0/16384
548 + * 7:5 "SKB data reserve" 512K 0/262144
549 + * 8:7 "IPv4 fragment cache" 512K 0/262144
552 +static void mem_reserve_show_item(struct seq_file *m, struct mem_reserve *res,
553 + unsigned int parent, unsigned int *id)
555 + struct mem_reserve *child;
556 + unsigned int my_id = ++*id;
558 + seq_printf(m, "%d:%d \"%s\" %ldK %ld/%ld\n",
559 + my_id, parent, res->name,
560 + res->pages << (PAGE_SHIFT - 10),
561 + res->usage, res->limit);
563 + list_for_each_entry(child, &res->children, siblings)
564 + mem_reserve_show_item(m, child, my_id, id);
567 +static int mem_reserve_show(struct seq_file *m, void *v)
569 + unsigned int ident = 0;
571 + mutex_lock(&mem_reserve_mutex);
572 + mem_reserve_show_item(m, &mem_reserve_root, ident, &ident);
573 + mutex_unlock(&mem_reserve_mutex);
578 +static int mem_reserve_open(struct inode *inode, struct file *file)
580 + return single_open(file, mem_reserve_show, NULL);
583 +static const struct file_operations mem_reserve_opterations = {
584 + .open = mem_reserve_open,
586 + .llseek = seq_lseek,
587 + .release = single_release,
590 +static __init int mem_reserve_proc_init(void)
592 + proc_create("reserve_info", S_IRUSR, NULL, &mem_reserve_opterations);
596 +module_init(mem_reserve_proc_init);
601 + * alloc_page helpers
605 + * mem_reserve_pages_set() - set reserves size in pages
606 + * @res - reserve to set
607 + * @pages - size in pages to set it to
609 + * Returns -ENOMEM when it fails to set the reserve. On failure the old size
612 +int mem_reserve_pages_set(struct mem_reserve *res, long pages)
616 + mutex_lock(&mem_reserve_mutex);
617 + pages -= res->pages;
618 + ret = __mem_reserve_add(res, pages, pages * PAGE_SIZE);
619 + mutex_unlock(&mem_reserve_mutex);
623 +EXPORT_SYMBOL_GPL(mem_reserve_pages_set);
626 + * mem_reserve_pages_add() - change the size in a relative way
627 + * @res - reserve to change
628 + * @pages - number of pages to add (or subtract when negative)
630 + * Similar to mem_reserve_pages_set, except that the argument is relative
631 + * instead of absolute.
633 + * Returns -ENOMEM when it fails to increase.
635 +int mem_reserve_pages_add(struct mem_reserve *res, long pages)
639 + mutex_lock(&mem_reserve_mutex);
640 + ret = __mem_reserve_add(res, pages, pages * PAGE_SIZE);
641 + mutex_unlock(&mem_reserve_mutex);
647 + * mem_reserve_pages_charge() - charge page usage to a reserve
648 + * @res - reserve to charge
649 + * @pages - size to charge
651 + * Returns non-zero on success.
653 +int mem_reserve_pages_charge(struct mem_reserve *res, long pages)
655 + return __mem_reserve_charge(res, pages * PAGE_SIZE);
657 +EXPORT_SYMBOL_GPL(mem_reserve_pages_charge);
664 + * mem_reserve_kmalloc_set() - set this reserve to bytes worth of kmalloc
665 + * @res - reserve to change
666 + * @bytes - size in bytes to reserve
668 + * Returns -ENOMEM on failure.
670 +int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes)
675 + mutex_lock(&mem_reserve_mutex);
676 + pages = kmalloc_estimate_bytes(GFP_ATOMIC, bytes);
677 + pages -= res->pages;
678 + bytes -= res->limit;
679 + ret = __mem_reserve_add(res, pages, bytes);
680 + mutex_unlock(&mem_reserve_mutex);
684 +EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_set);
687 + * mem_reserve_kmalloc_charge() - charge bytes to a reserve
688 + * @res - reserve to charge
689 + * @bytes - bytes to charge
691 + * Returns non-zero on success.
693 +int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes)
696 + bytes = -roundup_pow_of_two(-bytes);
698 + bytes = roundup_pow_of_two(bytes);
700 + return __mem_reserve_charge(res, bytes);
702 +EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_charge);
705 + * kmem_cache helpers
709 + * mem_reserve_kmem_cache_set() - set reserve to @objects worth of kmem_cache_alloc of @s
710 + * @res - reserve to set
711 + * @s - kmem_cache to reserve from
712 + * @objects - number of objects to reserve
714 + * Returns -ENOMEM on failure.
716 +int mem_reserve_kmem_cache_set(struct mem_reserve *res, struct kmem_cache *s,
722 + mutex_lock(&mem_reserve_mutex);
723 + pages = kmem_alloc_estimate(s, GFP_ATOMIC, objects);
724 + pages -= res->pages;
725 + bytes = objects * kmem_cache_size(s) - res->limit;
726 + ret = __mem_reserve_add(res, pages, bytes);
727 + mutex_unlock(&mem_reserve_mutex);
731 +EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_set);
734 + * mem_reserve_kmem_cache_charge() - charge (or uncharge) usage of objs
735 + * @res - reserve to charge
736 + * @objs - objects to charge for
738 + * Returns non-zero on success.
740 +int mem_reserve_kmem_cache_charge(struct mem_reserve *res, struct kmem_cache *s,
743 + return __mem_reserve_charge(res, objs * kmem_cache_size(s));
745 +EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_charge);
750 + * Actual usage is commented in linux/reserve.h where the interface functions
751 + * live. Furthermore, the code is 3 instances of the same paradigm, hence only
752 + * the first contains extensive comments.
759 +void *___kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
760 + struct mem_reserve *res, int *emerg)
766 + * Try a regular allocation, when that fails and we're not entitled
767 + * to the reserves, fail.
769 + gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
770 + obj = __kmalloc_node_track_caller(size, gfp, node, ip);
772 + if (obj || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
776 + * If we were given a reserve to charge against, try that.
778 + if (res && !mem_reserve_kmalloc_charge(res, size)) {
780 + * If we failed to charge and we're not allowed to wait for
781 + * it to succeed, bail.
783 + if (!(flags & __GFP_WAIT))
787 + * Wait for a successfull charge against the reserve. All
788 + * uncharge operations against this reserve will wake us up.
790 + wait_event(res->waitqueue,
791 + mem_reserve_kmalloc_charge(res, size));
794 + * After waiting for it, again try a regular allocation.
795 + * Pressure could have lifted during our sleep. If this
796 + * succeeds, uncharge the reserve.
798 + obj = __kmalloc_node_track_caller(size, gfp, node, ip);
800 + mem_reserve_kmalloc_charge(res, -size);
806 + * Regular allocation failed, and we've successfully charged our
807 + * requested usage against the reserve. Do the emergency allocation.
809 + obj = __kmalloc_node_track_caller(size, flags, node, ip);
818 +void __kfree_reserve(void *obj, struct mem_reserve *res, int emerg)
821 + * ksize gives the full allocated size vs the requested size we used to
822 + * charge; however since we round up to the nearest power of two, this
823 + * should all work nicely.
825 + size_t size = ksize(obj);
829 + * Free before uncharge, this ensures memory is actually present when
830 + * a subsequent charge succeeds.
832 + mem_reserve_kmalloc_charge(res, -size);
836 + * kmem_cache_alloc/kmem_cache_free
839 +void *__kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
840 + struct mem_reserve *res, int *emerg)
845 + gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
846 + obj = kmem_cache_alloc_node(s, gfp, node);
848 + if (obj || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
851 + if (res && !mem_reserve_kmem_cache_charge(res, s, 1)) {
852 + if (!(flags & __GFP_WAIT))
855 + wait_event(res->waitqueue,
856 + mem_reserve_kmem_cache_charge(res, s, 1));
858 + obj = kmem_cache_alloc_node(s, gfp, node);
860 + mem_reserve_kmem_cache_charge(res, s, -1);
865 + obj = kmem_cache_alloc_node(s, flags, node);
874 +void __kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
875 + struct mem_reserve *res, int emerg)
877 + kmem_cache_free(s, obj);
878 + mem_reserve_kmem_cache_charge(res, s, -1);
882 + * alloc_pages/free_pages
885 +struct page *__alloc_pages_reserve(int node, gfp_t flags, int order,
886 + struct mem_reserve *res, int *emerg)
890 + long pages = 1 << order;
892 + gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
893 + page = alloc_pages_node(node, gfp, order);
895 + if (page || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
898 + if (res && !mem_reserve_pages_charge(res, pages)) {
899 + if (!(flags & __GFP_WAIT))
902 + wait_event(res->waitqueue,
903 + mem_reserve_pages_charge(res, pages));
905 + page = alloc_pages_node(node, gfp, order);
907 + mem_reserve_pages_charge(res, -pages);
912 + page = alloc_pages_node(node, flags, order);
921 +void __free_pages_reserve(struct page *page, int order,
922 + struct mem_reserve *res, int emerg)
924 + __free_pages(page, order);
925 + mem_reserve_pages_charge(res, -(1 << order));
929 @@ -2728,6 +2728,7 @@ void *__kmalloc(size_t size, gfp_t flags
931 EXPORT_SYMBOL(__kmalloc);
934 static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
936 struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
937 @@ -2739,7 +2740,6 @@ static void *kmalloc_large_node(size_t s
942 void *__kmalloc_node(size_t size, gfp_t flags, int node)
944 struct kmem_cache *s;