2 module std.experimental.allocator.building_blocks.kernighan_ritchie;
3 import std.experimental.allocator.building_blocks.null_allocator;
6 version (unittest) import std.conv : text;
7 debug(KRRegion) import std.stdio;
11 $(D KRRegion) draws inspiration from the $(MREF_ALTTEXT region allocation
12 strategy, std,experimental,allocator,building_blocks,region) and also the
13 $(HTTP stackoverflow.com/questions/13159564/explain-this-implementation-of-malloc-from-the-kr-book,
14 famed allocator) described by Brian Kernighan and Dennis Ritchie in section 8.7
15 of the book $(HTTP amazon.com/exec/obidos/ASIN/0131103628/classicempire, "The C
16 Programming Language"), Second Edition, Prentice Hall, 1988.
18 $(H4 `KRRegion` = `Region` + Kernighan-Ritchie Allocator)
20 Initially, `KRRegion` starts in "region" mode: allocations are served from
21 the memory chunk in a region fashion. Thus, as long as there is enough memory
22 left, $(D KRRegion.allocate) has the performance profile of a region allocator.
23 Deallocation inserts (in $(BIGOH 1) time) the deallocated blocks in an
24 unstructured freelist, which is not read in region mode.
26 Once the region cannot serve an $(D allocate) request, $(D KRRegion) switches
27 to "free list" mode. It sorts the list of previously deallocated blocks by
28 address and serves allocation requests off that free list. The allocation and
29 deallocation follow the pattern described by Kernighan and Ritchie.
31 The recommended use of `KRRegion` is as a $(I region with deallocation). If the
32 `KRRegion` is dimensioned appropriately, it could often not enter free list
33 mode during its lifetime. Thus it is as fast as a simple region, whilst
34 offering deallocation at a small cost. When the region memory is exhausted,
35 the previously deallocated memory is still usable, at a performance cost. If
36 the region is not excessively large and fragmented, the linear allocation and
37 deallocation cost may still be compensated for by the good locality
40 If the chunk of memory managed is large, it may be desirable to switch
41 management to free list from the beginning. That way, memory may be used in a
42 more compact manner than region mode. To force free list mode, call $(D
43 switchToFreeList) shortly after construction or when deemed appropriate.
45 The smallest size that can be allocated is two words (16 bytes on 64-bit
46 systems, 8 bytes on 32-bit systems). This is because the free list management
47 needs two words (one for the length, the other for the next pointer in the
50 The $(D ParentAllocator) type parameter is the type of the allocator used to
51 allocate the memory chunk underlying the $(D KRRegion) object. Choosing the
52 default ($(D NullAllocator)) means the user is responsible for passing a buffer
53 at construction (and for deallocating it if necessary). Otherwise, $(D KRRegion)
54 automatically deallocates the buffer during destruction. For that reason, if
55 $(D ParentAllocator) is not $(D NullAllocator), then $(D KRRegion) is not
58 $(H4 Implementation Details)
60 In free list mode, $(D KRRegion) embeds a free blocks list onto the chunk of
61 memory. The free list is circular, coalesced, and sorted by address at all
62 times. Allocations and deallocations take time proportional to the number of
63 previously deallocated blocks. (In practice the cost may be lower, e.g. if
64 memory is deallocated in reverse order of allocation, all operations take
65 constant time.) Memory utilization is good (small control structure and no
66 per-allocation overhead). The disadvantages of freelist mode include proneness
67 to fragmentation, a minimum allocation size of two words, and linear worst-case
68 allocation and deallocation times.
70 Similarities of `KRRegion` (in free list mode) with the
71 Kernighan-Ritchie allocator:
74 $(LI Free blocks have variable size and are linked in a singly-linked list.)
75 $(LI The freelist is maintained in increasing address order, which makes
77 $(LI The strategy for finding the next available block is first fit.)
78 $(LI The free list is circular, with the last node pointing back to the first.)
79 $(LI Coalescing is carried during deallocation.)
82 Differences from the Kernighan-Ritchie allocator:
85 $(LI Once the chunk is exhausted, the Kernighan-Ritchie allocator allocates
86 another chunk using operating system primitives. For better composability, $(D
87 KRRegion) just gets full (returns $(D null) on new allocation requests). The
88 decision to allocate more blocks is deferred to a higher-level entity. For an
89 example, see the example below using $(D AllocatorList) in conjunction with $(D
91 $(LI Allocated blocks do not hold a size prefix. This is because in D the size
92 information is available in client code at deallocation time.)
96 struct KRRegion(ParentAllocator = NullAllocator)
98 import std.experimental.allocator.common : stateSize, alignedAt;
99 import std.traits : hasMember;
100 import std.typecons : Ternary;
102 private static struct Node
104 import std.typecons : tuple, Tuple;
111 void[] payload() inout
113 return (cast(ubyte*) &this)[0 .. size];
116 bool adjacent(in Node* right) const
120 return p.ptr < right && right < p.ptr + p.length + Node.sizeof;
123 bool coalesce(void* memoryEnd = null)
125 // Coalesce the last node before the memory end with any possible gap
127 && memoryEnd < payload.ptr + payload.length + Node.sizeof)
129 size += memoryEnd - (payload.ptr + payload.length);
133 if (!adjacent(next)) return false;
134 size = (cast(ubyte*) next + next.size) - cast(ubyte*) &this;
139 Tuple!(void[], Node*) allocateHere(size_t bytes)
141 assert(bytes >= Node.sizeof);
142 assert(bytes % Node.alignof == 0);
144 assert(!adjacent(next));
145 if (size < bytes) return typeof(return)();
146 assert(size >= bytes);
147 immutable leftover = size - bytes;
149 if (leftover >= Node.sizeof)
151 // There's room for another node
152 auto newNode = cast(Node*) ((cast(ubyte*) &this) + bytes);
153 newNode.size = leftover;
154 newNode.next = next == &this ? newNode : next;
156 return tuple(payload, newNode);
159 // No slack space, just return next node
160 return tuple(payload, next == &this ? null : next);
166 If $(D ParentAllocator) holds state, $(D parent) is a public member of type
167 $(D KRRegion). Otherwise, $(D parent) is an $(D alias) for
168 `ParentAllocator.instance`.
170 static if (stateSize!ParentAllocator) ParentAllocator parent;
171 else alias parent = ParentAllocator.instance;
172 private void[] payload;
174 private bool regionMode = true;
180 Node* start, current;
181 @property bool empty() { return !current; }
182 @property Node* front() { return current; }
185 assert(current && current.next);
186 current = current.next;
187 if (current == start) current = null;
189 @property Range save() { return this; }
191 import std.range : isForwardRange;
192 static assert(isForwardRange!Range);
193 return Range(root, root);
198 import std.format : format;
199 string s = "KRRegion@";
200 s ~= format("%s-%s(0x%s[%s] %s", &this, &this + 1,
201 payload.ptr, payload.length,
202 regionMode ? "(region)" : "(freelist)");
204 Node* lastNode = null;
207 foreach (node; byNodePtr)
209 s ~= format(", %sfree(0x%s[%s])",
210 lastNode && lastNode.adjacent(node) ? "+" : "",
211 cast(void*) node, node.size);
217 for (auto node = root; node; node = node.next)
219 s ~= format(", %sfree(0x%s[%s])",
220 lastNode && lastNode.adjacent(node) ? "+" : "",
221 cast(void*) node, node.size);
230 private void assertValid(string s)
242 assert(root >= payload.ptr, s);
243 assert(root < payload.ptr + payload.length, s);
245 // Check that the list terminates
247 foreach (node; byNodePtr)
250 assert(!node.adjacent(node.next));
251 assert(n++ < payload.length / Node.sizeof, s);
255 private Node* sortFreelist(Node* root)
257 // Find a monotonic run
261 if (!last.next) return root;
262 if (last > last.next) break;
263 assert(last < last.next);
266 auto tail = last.next;
268 tail = sortFreelist(tail);
269 return merge(root, tail);
272 private Node* merge(Node* left, Node* right)
274 assert(left != right);
275 if (!left) return right;
276 if (!right) return left;
280 result.next = merge(left.next, right);
284 result.next = merge(left, right.next);
288 private void coalesceAndMakeCircular()
290 for (auto n = root;;)
292 assert(!n.next || n < n.next);
295 // Convert to circular
299 if (n.coalesce) continue; // possibly another coalesce
305 Create a $(D KRRegion). If $(D ParentAllocator) is not $(D NullAllocator),
306 $(D KRRegion)'s destructor will call $(D parent.deallocate).
309 b = Block of memory to serve as support for the allocator. Memory must be
310 larger than two words and word-aligned.
311 n = Capacity desired. This constructor is defined only if $(D
312 ParentAllocator) is not $(D NullAllocator).
316 if (b.length < Node.sizeof)
319 assert(root is null);
320 assert(payload is null);
323 assert(b.length >= Node.sizeof);
324 assert(b.ptr.alignedAt(Node.alignof));
325 assert(b.length >= 2 * Node.sizeof);
327 root = cast(Node*) b.ptr;
328 // Initialize the free list with all list
331 root.size = b.length;
332 debug(KRRegion) writefln("KRRegion@%s: init with %s[%s]", &this,
337 static if (!is(ParentAllocator == NullAllocator))
340 assert(n > Node.sizeof);
341 this(cast(ubyte[])(parent.allocate(n)));
345 static if (!is(ParentAllocator == NullAllocator)
346 && hasMember!(ParentAllocator, "deallocate"))
349 parent.deallocate(payload);
353 Forces free list mode. If already in free list mode, does nothing.
354 Otherwise, sorts the free list accumulated so far and switches strategy for
355 future allocations to KR style.
357 void switchToFreeList()
359 if (!regionMode) return;
362 root = sortFreelist(root);
363 coalesceAndMakeCircular;
372 Word-level alignment.
374 enum alignment = Node.alignof;
377 Allocates $(D n) bytes. Allocation searches the list of available blocks
378 until a free block with $(D n) or more bytes is found (first fit strategy).
379 The block is split (if larger) and returned.
381 Params: n = number of bytes to _allocate
383 Returns: A word-aligned buffer of $(D n) bytes, or $(D null).
385 void[] allocate(size_t n)
387 if (!n || !root) return null;
388 const actualBytes = goodAllocSize(n);
390 // Try the region first
393 // Only look at the head of the freelist
394 if (root.size >= actualBytes)
396 // Enough room for allocation
398 immutable balance = root.size - actualBytes;
399 if (balance >= Node.sizeof)
401 auto newRoot = cast(Node*) (result + actualBytes);
402 newRoot.next = root.next;
403 newRoot.size = balance;
411 return result[0 .. n];
414 // Not enough memory, switch to freelist mode and fall through
418 // Try to allocate from next after the iterating node
419 for (auto pnode = root;;)
421 assert(!pnode.adjacent(pnode.next));
422 auto k = pnode.next.allocateHere(actualBytes);
426 assert(k[0].length >= n);
427 if (root == pnode.next) root = k[1];
433 if (pnode == root) break;
439 Deallocates $(D b), which is assumed to have been previously allocated with
440 this allocator. Deallocation performs a linear search in the free list to
441 preserve its sorting order. It follows that blocks with higher addresses in
442 allocators with many free blocks are slower to deallocate.
444 Params: b = block to be deallocated
446 bool deallocate(void[] b)
448 debug(KRRegion) writefln("KRRegion@%s: deallocate(%s[%s])", &this,
450 if (!b.ptr) return true;
451 assert(owns(b) == Ternary.yes);
452 assert(b.ptr.alignedAt(Node.alignof));
454 // Insert back in the freelist, keeping it sorted by address. Do not
455 // coalesce at this time. Instead, do it lazily during allocation.
456 auto n = cast(Node*) b.ptr;
457 n.size = goodAllocSize(b.length);
458 auto memoryEnd = payload.ptr + payload.length;
463 // Insert right after root
471 // What a sight for sore eyes
475 // If the first block freed is the last one allocated,
476 // maybe there's a gap after it.
477 root.coalesce(memoryEnd);
481 version (assert) foreach (test; byNodePtr)
489 assert(pnode && pnode.next);
491 assert(pnode.next != n);
492 if (pnode < pnode.next)
494 if (pnode >= n || n >= pnode.next) continue;
495 // Insert in between pnode and pnode.next
505 // Insert at the end of the list
506 // Add any possible gap at the end of n to the length of n
509 n.coalesce(memoryEnd);
514 else if (n < pnode.next)
516 // Insert at the front of the list
524 while ((pnode = pnode.next) != root);
525 assert(0, "Wrong parameter passed to deallocate");
529 Allocates all memory available to this allocator. If the allocator is empty,
530 returns the entire available block of memory. Otherwise, it still performs
531 a best-effort allocation: if there is no fragmentation (e.g. $(D allocate)
532 has been used but not $(D deallocate)), allocates and returns the only
533 available block of memory.
535 The operation takes time proportional to the number of adjacent free blocks
536 at the front of the free list. These blocks get coalesced, whether
537 $(D allocateAll) succeeds or fails due to fragmentation.
541 if (regionMode) switchToFreeList;
542 if (root && root.next == root)
543 return allocate(root.size);
550 import std.experimental.allocator.gc_allocator : GCAllocator;
551 auto alloc = KRRegion!GCAllocator(1024 * 64);
552 const b1 = alloc.allocate(2048);
553 assert(b1.length == 2048);
554 const b2 = alloc.allocateAll;
555 assert(b2.length == 1024 * 62);
559 Deallocates all memory currently allocated, making the allocator ready for
560 other allocations. This is a $(BIGOH 1) operation.
564 debug(KRRegion) assertValid("deallocateAll");
565 debug(KRRegion) scope(exit) assertValid("deallocateAll");
566 root = cast(Node*) payload.ptr;
567 // Initialize the free list with all list
571 root.size = payload.length;
577 Checks whether the allocator is responsible for the allocation of $(D b).
578 It does a simple $(BIGOH 1) range check. $(D b) should be a buffer either
579 allocated with $(D this) or obtained through other means.
581 Ternary owns(void[] b)
583 debug(KRRegion) assertValid("owns");
584 debug(KRRegion) scope(exit) assertValid("owns");
585 return Ternary(b.ptr >= payload.ptr
586 && b.ptr < payload.ptr + payload.length);
590 Adjusts $(D n) to a size suitable for allocation (two words or larger,
593 static size_t goodAllocSize(size_t n)
595 import std.experimental.allocator.common : roundUpToMultipleOf;
596 return n <= Node.sizeof
597 ? Node.sizeof : n.roundUpToMultipleOf(alignment);
601 Returns: `Ternary.yes` if the allocator is empty, `Ternary.no` otherwise.
602 Never returns `Ternary.unknown`.
606 return Ternary(root && root.size == payload.length);
611 $(D KRRegion) is preferable to $(D Region) as a front for a general-purpose
612 allocator if $(D deallocate) is needed, yet the actual deallocation traffic is
613 relatively low. The example below shows a $(D KRRegion) using stack storage
614 fronting the GC allocator.
618 import std.experimental.allocator.building_blocks.fallback_allocator
620 import std.experimental.allocator.gc_allocator : GCAllocator;
621 import std.typecons : Ternary;
622 // KRRegion fronting a general-purpose allocator
623 ubyte[1024 * 128] buf;
624 auto alloc = fallbackAllocator(KRRegion!()(buf), GCAllocator.instance);
625 auto b = alloc.allocate(100);
626 assert(b.length == 100);
627 assert(alloc.primary.owns(b) == Ternary.yes);
631 The code below defines a scalable allocator consisting of 1 MB (or larger)
632 blocks fetched from the garbage-collected heap. Each block is organized as a
633 KR-style heap. More blocks are allocated and freed on a need basis.
635 This is the closest example to the allocator introduced in the K$(AMP)R book.
636 It should perform slightly better because instead of searching through one
637 large free list, it searches through several shorter lists in LRU order. Also,
638 it actually returns memory to the operating system when possible.
642 import std.algorithm.comparison : max;
643 import std.experimental.allocator.building_blocks.allocator_list
645 import std.experimental.allocator.gc_allocator : GCAllocator;
646 import std.experimental.allocator.mmap_allocator : MmapAllocator;
647 AllocatorList!(n => KRRegion!MmapAllocator(max(n * 16, 1024 * 1024))) alloc;
652 import std.algorithm.comparison : max;
653 import std.experimental.allocator.building_blocks.allocator_list
655 import std.experimental.allocator.gc_allocator : GCAllocator;
656 import std.experimental.allocator.mallocator : Mallocator;
657 import std.typecons : Ternary;
659 Create a scalable allocator consisting of 1 MB (or larger) blocks fetched
660 from the garbage-collected heap. Each block is organized as a KR-style
661 heap. More blocks are allocated and freed on a need basis.
663 AllocatorList!(n => KRRegion!Mallocator(max(n * 16, 1024 * 1024)),
664 NullAllocator) alloc;
666 foreach (i; 0 .. array.length)
668 auto length = i * 10_000 + 1;
669 array[i] = alloc.allocate(length);
670 assert(array[i].ptr);
671 assert(array[i].length == length);
673 import std.random : randomShuffle;
674 randomShuffle(array[]);
675 foreach (i; 0 .. array.length)
677 assert(array[i].ptr);
678 assert(alloc.owns(array[i]) == Ternary.yes);
679 alloc.deallocate(array[i]);
685 import std.algorithm.comparison : max;
686 import std.experimental.allocator.building_blocks.allocator_list
688 import std.experimental.allocator.gc_allocator : GCAllocator;
689 import std.experimental.allocator.mmap_allocator : MmapAllocator;
690 import std.typecons : Ternary;
692 Create a scalable allocator consisting of 1 MB (or larger) blocks fetched
693 from the garbage-collected heap. Each block is organized as a KR-style
694 heap. More blocks are allocated and freed on a need basis.
697 auto result = KRRegion!MmapAllocator(max(n * 2, 1024 * 1024));
701 foreach (i; 0 .. array.length)
703 auto length = i * 10_000 + 1;
704 array[i] = alloc.allocate(length);
705 assert(array[i].ptr);
708 assert(array[i].ptr != array[j].ptr);
710 assert(array[i].length == length);
712 import std.random : randomShuffle;
713 randomShuffle(array[]);
714 foreach (i; 0 .. array.length)
716 assert(alloc.owns(array[i]) == Ternary.yes);
717 alloc.deallocate(array[i]);
723 import std.algorithm.comparison : max;
724 import std.experimental.allocator.building_blocks.allocator_list
726 import std.experimental.allocator.common : testAllocator;
727 import std.experimental.allocator.gc_allocator : GCAllocator;
728 testAllocator!(() => AllocatorList!(
729 n => KRRegion!GCAllocator(max(n * 16, 1024 * 1024)))());
734 import std.experimental.allocator.gc_allocator : GCAllocator;
736 auto alloc = KRRegion!GCAllocator(1024 * 1024);
741 array ~= alloc.allocate(i);
742 assert(array[$ - 1].length == i);
744 alloc.deallocate(array[1]);
745 alloc.deallocate(array[0]);
746 alloc.deallocate(array[2]);
747 assert(alloc.allocateAll().length == 1024 * 1024);
752 import std.experimental.allocator.gc_allocator : GCAllocator;
753 import std.typecons : Ternary;
754 auto alloc = KRRegion!()(
755 cast(ubyte[])(GCAllocator.instance.allocate(1024 * 1024)));
756 const store = alloc.allocate(KRRegion!().sizeof);
757 auto p = cast(KRRegion!()* ) store.ptr;
758 import core.stdc.string : memcpy;
759 import std.algorithm.mutation : move;
760 import std.conv : emplace;
762 memcpy(p, &alloc, alloc.sizeof);
766 foreach (i; 0 .. array.length)
768 auto length = 100 * i + 1;
769 array[i] = p.allocate(length);
770 assert(array[i].length == length, text(array[i].length));
771 assert(p.owns(array[i]) == Ternary.yes);
773 import std.random : randomShuffle;
774 randomShuffle(array[]);
775 foreach (i; 0 .. array.length)
777 assert(p.owns(array[i]) == Ternary.yes);
778 p.deallocate(array[i]);
780 auto b = p.allocateAll();
781 assert(b.length == 1024 * 1024 - KRRegion!().sizeof, text(b.length));
786 import std.experimental.allocator.gc_allocator : GCAllocator;
787 auto alloc = KRRegion!()(
788 cast(ubyte[])(GCAllocator.instance.allocate(1024 * 1024)));
789 auto p = alloc.allocateAll();
790 assert(p.length == 1024 * 1024);
791 alloc.deallocateAll();
792 p = alloc.allocateAll();
793 assert(p.length == 1024 * 1024);
798 import std.experimental.allocator.building_blocks;
800 import std.typecons : Ternary;
802 // Both sequences must work on either system
804 // A sequence of allocs which generates the error described in issue 16564
805 // that is a gap at the end of buf from the perspective of the allocator
807 // for 64 bit systems (leftover balance = 8 bytes < 16)
808 int[] sizes64 = [18904, 2008, 74904, 224, 111904, 1904, 52288, 8];
810 // for 32 bit systems (leftover balance < 8)
811 int[] sizes32 = [81412, 107068, 49892, 23768];
814 void test(int[] sizes)
816 align(size_t.sizeof) ubyte[256 * 1024] buf;
817 auto a = KRRegion!()(buf);
821 foreach (size; sizes)
823 bufs ~= a.allocate(size);
826 foreach (b; bufs.randomCover)
831 assert(a.empty == Ternary.yes);
840 import std.experimental.allocator.building_blocks;
842 import std.typecons : Ternary;
844 // For 64 bits, we allocate in multiples of 8, but the minimum alloc size is 16.
845 // This can create gaps.
846 // This test is an example of such a case. The gap is formed between the block
847 // allocated for the second value in sizes and the third. There is also a gap
848 // at the very end. (total lost 2 * word)
850 int[] sizes64 = [2008, 18904, 74904, 224, 111904, 1904, 52288, 8];
851 int[] sizes32 = [81412, 107068, 49892, 23768];
856 void test(int[] sizes, int word)
858 align(size_t.sizeof) ubyte[256 * 1024] buf;
859 auto a = KRRegion!()(buf);
863 foreach (size; sizes)
865 bufs ~= a.allocate(size);
868 a.deallocate(bufs[1]);
869 bufs ~= a.allocate(sizes[1] - word);
871 a.deallocate(bufs[0]);
872 foreach (i; 2 .. bufs.length)
874 a.deallocate(bufs[i]);
877 assert(a.empty == Ternary.yes);
880 test(sizes64, word64);
881 test(sizes32, word32);