]>
Commit | Line | Data |
---|---|---|
be17aa82 | 1 | /* |
b8ae064d | 2 | * Copyright (C) 1996-2023 The Squid Software Foundation and contributors |
be17aa82 | 3 | * |
bbc27441 AJ |
4 | * Squid software is distributed under GPLv2+ license and includes |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
be17aa82 AR |
7 | */ |
8 | ||
bbc27441 AJ |
9 | /* DEBUG: section 54 Interprocess Communication */ |
10 | ||
f7f3304a | 11 | #include "squid.h" |
be17aa82 | 12 | |
675b8408 | 13 | #include "debug/Stream.h" |
68353d5a | 14 | #include "ipc/mem/Page.h" |
be17aa82 AR |
15 | #include "ipc/mem/PageStack.h" |
16 | ||
a5860854 AR |
17 | #include <cmath> |
18 | #include <algorithm> | |
1f8b5f0e | 19 | #include <limits> |
c3408a33 | 20 | |
a5860854 AR |
21 | /* |
22 | ||
23 | Ipc::Mem::IdSet and related code maintains a perfect full binary tree structure: | |
24 | ||
25 | (l,r) | |
26 | /\ | |
27 | (ll,lr) (rl,rr) | |
28 | /\ /\ | |
29 | L1 L2 L3 L4 | |
30 | ||
31 | where | |
32 | ||
33 | * (l,r) is an always-present root node; | |
34 | * inner nodes, including the root one, count the total number of available | |
35 | IDs in the leaf nodes of the left and right subtrees (e.g., r = rl + rr); | |
36 | * leaf nodes are bitsets of available IDs (e.g., rl = number of 1s in L3); | |
37 | all leaf nodes are always present. | |
38 | ||
39 | The above sample tree would be stored as seven 64-bit atomic integers: | |
40 | (l,r), (ll,lr), (rl,rr), L1, L2, L3, L4 | |
41 | ||
42 | */ | |
43 | ||
44 | namespace Ipc | |
45 | { | |
46 | ||
47 | namespace Mem | |
48 | { | |
49 | ||
50 | /// the maximum number of pages that a leaf node can store | |
51 | static const IdSet::size_type BitsPerLeaf = 64; | |
52 | ||
53 | class IdSetPosition | |
54 | { | |
55 | public: | |
56 | using size_type = IdSet::size_type; | |
57 | ||
58 | IdSetPosition() = default; ///< root node position | |
59 | IdSetPosition(size_type aLevel, size_type anOffset); | |
60 | ||
61 | /// whether we are at the top of the tree | |
62 | bool atRoot() const { return !level && !offset; } | |
63 | ||
64 | /// which direction is this position from our parent node | |
65 | IdSetNavigationDirection ascendDirection() const; | |
66 | ||
67 | /// the number of levels above us (e.g., zero for the root node) | |
68 | IdSet::size_type level = 0; | |
69 | /// the number of nodes (at our level) to the left of us | |
70 | IdSet::size_type offset = 0; | |
71 | }; | |
72 | ||
73 | /// a helper class to perform inner node manipulation for IdSet | |
74 | class IdSetInnerNode | |
75 | { | |
76 | public: | |
77 | using size_type = IdSet::size_type; | |
78 | typedef uint64_t Packed; ///< (atomically) stored serialized value | |
79 | ||
80 | /// de-serializes a given value | |
81 | static IdSetInnerNode Unpack(Packed packed); | |
82 | ||
83 | IdSetInnerNode() = default; | |
84 | IdSetInnerNode(size_type left, size_type right); | |
85 | ||
86 | /// returns a serializes value suitable for shared memory storage | |
87 | Packed pack() const { return (static_cast<Packed>(left) << 32) | right; } | |
88 | ||
89 | size_type left = 0; ///< the number of available IDs in the left subtree | |
90 | size_type right = 0; ///< the number of available IDs in the right subtree | |
91 | }; | |
92 | ||
93 | } // namespace Mem | |
94 | ||
95 | } // namespace Ipc | |
96 | ||
97 | /* Ipc::Mem::IdSetPosition */ | |
98 | ||
99 | Ipc::Mem::IdSetPosition::IdSetPosition(size_type aLevel, size_type anOffset): | |
100 | level(aLevel), | |
101 | offset(anOffset) | |
102 | { | |
103 | } | |
104 | ||
105 | Ipc::Mem::IdSetNavigationDirection | |
106 | Ipc::Mem::IdSetPosition::ascendDirection() const | |
107 | { | |
108 | return (offset % 2 == 0) ? dirLeft : dirRight; | |
109 | } | |
110 | ||
111 | /* Ipc::Mem::IdSetMeasurements */ | |
112 | ||
113 | Ipc::Mem::IdSetMeasurements::IdSetMeasurements(const size_type aCapacity) | |
114 | { | |
115 | capacity = aCapacity; | |
116 | ||
117 | // For simplicity, we want a perfect full binary tree with root and leaves. | |
118 | // We could compute all this with log2() calls, but rounding and honoring | |
119 | // root+leaves minimums make that approach more complex than this fast loop. | |
120 | requestedLeafNodeCount = (capacity + (BitsPerLeaf-1))/BitsPerLeaf; | |
121 | treeHeight = 1+1; // the root level plus the leaf nodes level | |
122 | leafNodeCount = 2; // the root node can have only two leaf nodes | |
123 | while (leafNodeCount < requestedLeafNodeCount) { | |
124 | leafNodeCount *= 2; | |
125 | ++treeHeight; | |
126 | } | |
127 | innerLevelCount = treeHeight - 1; | |
128 | ||
129 | debugs(54, 5, "rounded capacity up from " << capacity << " to " << (leafNodeCount*BitsPerLeaf)); | |
130 | ||
131 | // we do (1 << level) when computing 32-bit IdSetInnerNode::left | |
132 | assert(treeHeight < 32); | |
133 | } | |
134 | ||
135 | /* Ipc::Mem::IdSetInnerNode */ | |
136 | ||
137 | Ipc::Mem::IdSetInnerNode::IdSetInnerNode(size_type aLeft, size_type aRight): | |
138 | left(aLeft), | |
139 | right(aRight) | |
140 | { | |
141 | } | |
142 | ||
143 | Ipc::Mem::IdSetInnerNode | |
144 | Ipc::Mem::IdSetInnerNode::Unpack(Packed packed) | |
145 | { | |
146 | // truncation during the cast is intentional here | |
147 | return IdSetInnerNode(packed >> 32, static_cast<uint32_t>(packed)); | |
148 | } | |
149 | ||
150 | /* Ipc::Mem::IdSet */ | |
151 | ||
152 | Ipc::Mem::IdSet::IdSet(const size_type capacity): | |
153 | measurements(capacity), | |
154 | nodes_(capacity) | |
155 | { | |
156 | // For valueAddress() to be able to return a raw uint64_t pointer, the | |
157 | // atomic wrappers in nodes_ must be zero-size. Check the best we can. Once. | |
158 | static_assert(sizeof(StoredNode) == sizeof(Node), "atomic locks use no storage"); | |
159 | assert(StoredNode().is_lock_free()); | |
a5860854 AR |
160 | } |
161 | ||
162 | void | |
163 | Ipc::Mem::IdSet::makeFullBeforeSharing() | |
164 | { | |
165 | // initially, all IDs are marked as available | |
166 | fillAllNodes(); | |
167 | ||
168 | // ... but IDs beyond the requested capacity should not be available | |
169 | if (measurements.capacity != measurements.leafNodeCount*BitsPerLeaf) | |
170 | truncateExtras(); | |
171 | } | |
172 | ||
173 | /// populates the entire allocated tree with available IDs | |
174 | /// may exceed the requested capacity; \see truncateExtras() | |
175 | void | |
176 | Ipc::Mem::IdSet::fillAllNodes() | |
177 | { | |
178 | // leaf nodes | |
179 | auto pos = Position(measurements.treeHeight-1, 0); | |
180 | const auto allOnes = ~uint64_t(0); | |
181 | std::fill_n(valueAddress(pos), measurements.leafNodeCount, allOnes); | |
182 | ||
183 | // inner nodes, starting from the bottom of the tree | |
184 | auto nodesAtLevel = measurements.leafNodeCount/2; | |
185 | auto pagesBelow = BitsPerLeaf; | |
186 | do { | |
187 | pos = ascend(pos); | |
188 | const auto value = IdSetInnerNode(pagesBelow, pagesBelow).pack(); | |
189 | std::fill_n(valueAddress(pos), nodesAtLevel, value); | |
190 | nodesAtLevel /= 2; | |
191 | pagesBelow *= 2; | |
192 | } while (!pos.atRoot()); | |
193 | } | |
194 | ||
195 | /// effectively removes IDs that exceed the requested capacity after makeFull() | |
196 | void | |
197 | Ipc::Mem::IdSet::truncateExtras() | |
198 | { | |
199 | // leaf nodes | |
200 | // start with the left-most leaf that should have some 0s; it may even have | |
201 | // no 1s at all (i.e. be completely unused) | |
202 | auto pos = Position(measurements.treeHeight-1, measurements.capacity/BitsPerLeaf); | |
203 | leafTruncate(pos, measurements.capacity % BitsPerLeaf); | |
204 | const auto rightLeaves = measurements.leafNodeCount - measurements.requestedLeafNodeCount; | |
205 | // this zeroing of the leaf nodes to the right from pos is only necessary to | |
206 | // trigger asserts if the code dealing with the inner node counters is buggy | |
207 | if (rightLeaves > 1) | |
208 | std::fill_n(valueAddress(pos) + 1, rightLeaves-1, 0); | |
c3408a33 | 209 | |
a5860854 AR |
210 | // inner nodes, starting from the bottom of the tree; optimization: only |
211 | // adjusting nodes on the way up from the first leaf-with-0s position | |
212 | auto toSubtract = BitsPerLeaf - (measurements.capacity % BitsPerLeaf); | |
213 | do { | |
214 | const auto direction = pos.ascendDirection(); | |
215 | pos = ascend(pos); | |
216 | toSubtract = innerTruncate(pos, direction, toSubtract); | |
217 | } while (!pos.atRoot()); | |
218 | } | |
219 | ||
220 | /// fill the leaf node at a given position with 0s, leaving only idsToKeep IDs | |
c3408a33 | 221 | void |
a5860854 | 222 | Ipc::Mem::IdSet::leafTruncate(const Position pos, const size_type idsToKeep) |
c3408a33 | 223 | { |
a5860854 AR |
224 | Node &node = *valueAddress(pos); // no auto to simplify the asserts() below |
225 | assert(node == std::numeric_limits<Node>::max()); // all 1s | |
226 | static_assert(std::is_unsigned<Node>::value, "right shift prepends 0s"); | |
227 | node >>= BitsPerLeaf - idsToKeep; | |
228 | // node can be anything here, including all 0s and all 1s | |
229 | } | |
230 | ||
231 | /// accounts for toSubtract IDs removal from a subtree in the given direction of | |
232 | /// the given position | |
233 | /// \returns the number of IDs to subtract from the parent node | |
234 | Ipc::Mem::IdSet::size_type | |
235 | Ipc::Mem::IdSet::innerTruncate(const Position pos, const NavigationDirection dir, const size_type toSubtract) | |
236 | { | |
237 | auto *valuePtr = valueAddress(pos); | |
238 | auto value = IdSetInnerNode::Unpack(*valuePtr); | |
239 | size_type toSubtractNext = 0; | |
240 | if (dir == dirLeft) { | |
241 | toSubtractNext = toSubtract + value.right; | |
242 | assert(value.left >= toSubtract); | |
243 | value.left -= toSubtract; | |
244 | value.right = 0; | |
245 | } else { | |
246 | assert(dir == dirRight); | |
247 | toSubtractNext = toSubtract; | |
248 | assert(value.right >= toSubtract); | |
249 | // value.left is unchanged; we have only adjusted the right branch | |
250 | value.right -= toSubtract; | |
251 | } | |
252 | *valuePtr = value.pack(); | |
253 | return toSubtractNext; | |
254 | } | |
255 | ||
256 | /// accounts for an ID added to subtree in the given dir from the given position | |
257 | void | |
258 | Ipc::Mem::IdSet::innerPush(const Position pos, const NavigationDirection dir) | |
259 | { | |
260 | // either left or right component will be true/1; the other will be false/0 | |
261 | const auto increment = IdSetInnerNode(dir == dirLeft, dir == dirRight).pack(); | |
262 | const auto previousValue = nodeAt(pos).fetch_add(increment); | |
263 | // no overflows | |
264 | assert(previousValue <= std::numeric_limits<Node>::max() - increment); | |
265 | } | |
266 | ||
267 | /// accounts for future ID removal from a subtree of the given position | |
268 | /// \returns the direction of the subtree chosen to relinquish the ID | |
269 | Ipc::Mem::IdSet::NavigationDirection | |
270 | Ipc::Mem::IdSet::innerPop(const Position pos) | |
271 | { | |
272 | NavigationDirection direction = dirNone; | |
273 | ||
274 | auto &node = nodeAt(pos); | |
275 | auto oldValue = node.load(); | |
276 | IdSetInnerNode newValue; | |
277 | do { | |
278 | newValue = IdSetInnerNode::Unpack(oldValue); | |
279 | if (newValue.left) { | |
280 | --newValue.left; | |
281 | direction = dirLeft; | |
282 | } else if (newValue.right) { | |
283 | --newValue.right; | |
284 | direction = dirRight; | |
285 | } else { | |
286 | return dirEnd; | |
287 | } | |
288 | } while (!node.compare_exchange_weak(oldValue, newValue.pack())); | |
289 | ||
290 | assert(direction == dirLeft || direction == dirRight); | |
291 | return direction; | |
c3408a33 EB |
292 | } |
293 | ||
a5860854 | 294 | /// adds the given ID to the leaf node at the given position |
c3408a33 | 295 | void |
a5860854 | 296 | Ipc::Mem::IdSet::leafPush(const Position pos, const size_type id) |
c3408a33 | 297 | { |
a5860854 AR |
298 | const auto mask = Node(1) << (id % BitsPerLeaf); |
299 | const auto oldValue = nodeAt(pos).fetch_or(mask); | |
300 | // this was a new entry | |
301 | assert((oldValue & mask) == 0); | |
302 | } | |
303 | ||
304 | // TODO: After switching to C++20, use countr_zero() which may compile to a | |
305 | // single TZCNT assembly instruction on modern CPUs. | |
306 | /// a temporary C++20 countr_zero() replacement | |
307 | static inline | |
308 | int trailingZeros(uint64_t x) | |
309 | { | |
310 | if (!x) | |
311 | return 64; | |
312 | int count = 0; | |
313 | for (uint64_t mask = 1; !(x & mask); mask <<= 1) | |
314 | ++count; | |
315 | return count; | |
316 | } | |
317 | ||
318 | /// extracts and returns an ID from the leaf node at the given position | |
319 | Ipc::Mem::IdSet::size_type | |
320 | Ipc::Mem::IdSet::leafPop(const Position pos) | |
321 | { | |
322 | auto &node = nodeAt(pos); | |
323 | auto oldValue = node.load(); | |
324 | Node newValue; | |
325 | do { | |
326 | assert(oldValue > 0); | |
327 | const auto mask = oldValue - 1; // flips the rightmost 1 and trailing 0s | |
328 | newValue = oldValue & mask; // clears the rightmost 1 | |
329 | } while (!node.compare_exchange_weak(oldValue, newValue)); | |
330 | ||
331 | return pos.offset*BitsPerLeaf + trailingZeros(oldValue); | |
332 | } | |
333 | ||
334 | /// \returns the position of a parent node of the node at the given position | |
335 | Ipc::Mem::IdSet::Position | |
336 | Ipc::Mem::IdSet::ascend(Position pos) | |
337 | { | |
338 | assert(pos.level > 0); | |
339 | --pos.level; | |
340 | pos.offset /= 2; | |
341 | return pos; | |
342 | } | |
343 | ||
344 | /// \returns the position of a child node in the given direction of the parent | |
345 | /// node at the given position | |
346 | Ipc::Mem::IdSet::Position | |
347 | Ipc::Mem::IdSet::descend(Position pos, const NavigationDirection direction) | |
348 | { | |
349 | assert(pos.level < measurements.treeHeight); | |
350 | ++pos.level; | |
351 | ||
352 | pos.offset *= 2; | |
353 | if (direction == dirRight) | |
354 | ++pos.offset; | |
355 | else | |
356 | assert(direction == dirLeft); | |
357 | ||
358 | return pos; | |
359 | } | |
360 | ||
361 | /// \returns the atomic node (either inner or leaf) at the given position | |
362 | Ipc::Mem::IdSet::StoredNode & | |
363 | Ipc::Mem::IdSet::nodeAt(const Position pos) | |
364 | { | |
365 | assert(pos.level < measurements.treeHeight); | |
366 | // n = 2^(h+1) - 1 with h = level-1 | |
367 | const auto nodesAbove = (1U << pos.level) - 1; | |
368 | ||
369 | // the second clause is for the special case of a root node | |
370 | assert(pos.offset < nodesAbove*2 || (pos.atRoot() && nodesAbove == 0)); | |
371 | const auto nodesToTheLeft = pos.offset; | |
372 | ||
373 | const size_t nodesBefore = nodesAbove + nodesToTheLeft; | |
374 | assert(nodesBefore < measurements.nodeCount()); | |
375 | return nodes_[nodesBefore]; | |
376 | } | |
377 | ||
378 | /// \returns the location of the raw (inner or leaf) node at the given position | |
379 | Ipc::Mem::IdSet::Node * | |
380 | Ipc::Mem::IdSet::valueAddress(const Position pos) | |
381 | { | |
382 | // IdSet() constructor asserts that this frequent reinterpret_cast is safe | |
383 | return &reinterpret_cast<Node&>(nodeAt(pos)); | |
384 | } | |
385 | ||
386 | bool | |
387 | Ipc::Mem::IdSet::pop(size_type &id) | |
388 | { | |
389 | Position rootPos; | |
390 | const auto directionFromRoot = innerPop(rootPos); | |
391 | if (directionFromRoot == dirEnd) | |
392 | return false; // an empty tree | |
393 | ||
394 | auto pos = descend(rootPos, directionFromRoot); | |
395 | for (size_t level = 1; level < measurements.innerLevelCount; ++level) { | |
396 | const auto direction = innerPop(pos); | |
397 | pos = descend(pos, direction); | |
398 | } | |
399 | ||
400 | id = leafPop(pos); | |
401 | return true; | |
402 | } | |
403 | ||
404 | void | |
405 | Ipc::Mem::IdSet::push(const size_type id) | |
406 | { | |
407 | const auto offsetAtLeafLevel = id/BitsPerLeaf; | |
408 | auto pos = Position(measurements.innerLevelCount, offsetAtLeafLevel); | |
409 | leafPush(pos, id); | |
410 | ||
411 | do { | |
412 | const auto direction = pos.ascendDirection(); | |
413 | pos = ascend(pos); | |
414 | innerPush(pos, direction); | |
415 | } while (!pos.atRoot()); | |
416 | } | |
417 | ||
418 | size_t | |
419 | Ipc::Mem::IdSet::MemorySize(const size_type capacity) | |
420 | { | |
421 | const IdSetMeasurements measurements(capacity); | |
422 | // Adding sizeof(IdSet) double-counts the first node but it is better to | |
423 | // overestimate (a little) than to underestimate our memory needs due to | |
424 | // padding, new data members, etc. | |
425 | return sizeof(IdSet) + measurements.nodeCount() * sizeof(StoredNode); | |
c3408a33 EB |
426 | } |
427 | ||
428 | /* Ipc::Mem::PageStack */ | |
429 | ||
f670688c EB |
430 | Ipc::Mem::PageStack::PageStack(const Config &config): |
431 | config_(config), | |
c3408a33 | 432 | size_(0), |
f670688c | 433 | ids_(config_.capacity) |
a5860854 | 434 | { |
f670688c EB |
435 | if (config.createFull) { |
436 | ids_.makeFullBeforeSharing(); | |
437 | size_ = config_.capacity; | |
438 | } | |
2cde0303 FC |
439 | } |
440 | ||
be17aa82 | 441 | bool |
68353d5a | 442 | Ipc::Mem::PageStack::pop(PageId &page) |
be17aa82 | 443 | { |
c3408a33 EB |
444 | assert(!page); |
445 | ||
f670688c | 446 | if (!config_.capacity) |
a5860854 | 447 | return false; |
c3408a33 | 448 | |
a5860854 AR |
449 | IdSet::size_type pageIndex = 0; |
450 | if (!ids_.pop(pageIndex)) | |
451 | return false; | |
c3408a33 EB |
452 | |
453 | // must decrement after removing the page to avoid underflow | |
454 | const auto newSize = --size_; | |
f670688c | 455 | assert(newSize < config_.capacity); |
c3408a33 | 456 | |
a5860854 | 457 | page.number = pageIndex + 1; |
f670688c | 458 | page.pool = config_.poolId; |
c3408a33 | 459 | debugs(54, 8, page << " size: " << newSize); |
a5860854 | 460 | assert(pageIdIsValid(page)); |
c3408a33 | 461 | return true; |
be17aa82 AR |
462 | } |
463 | ||
464 | void | |
68353d5a | 465 | Ipc::Mem::PageStack::push(PageId &page) |
be17aa82 | 466 | { |
c3408a33 EB |
467 | debugs(54, 8, page); |
468 | assert(page); | |
469 | assert(pageIdIsValid(page)); | |
470 | ||
c3408a33 EB |
471 | // must increment before inserting the page to avoid underflow in pop() |
472 | const auto newSize = ++size_; | |
f670688c | 473 | assert(newSize <= config_.capacity); |
c3408a33 | 474 | |
a5860854 AR |
475 | const auto pageIndex = page.number - 1; |
476 | ids_.push(pageIndex); | |
c3408a33 EB |
477 | |
478 | debugs(54, 8, page << " size: " << newSize); | |
479 | page = PageId(); | |
be17aa82 AR |
480 | } |
481 | ||
68353d5a DK |
482 | bool |
483 | Ipc::Mem::PageStack::pageIdIsValid(const PageId &page) const | |
be17aa82 | 484 | { |
f670688c | 485 | return page.pool == config_.poolId && |
c3408a33 | 486 | 0 < page.number && page.number <= capacity(); |
68353d5a DK |
487 | } |
488 | ||
489 | size_t | |
490 | Ipc::Mem::PageStack::sharedMemorySize() const | |
491 | { | |
f670688c | 492 | return SharedMemorySize(config_); |
be17aa82 | 493 | } |
2d0647fb DK |
494 | |
495 | size_t | |
f670688c | 496 | Ipc::Mem::PageStack::SharedMemorySize(const Config &cfg) |
2d0647fb | 497 | { |
b454559b | 498 | const auto levelsSize = PageId::maxPurpose * sizeof(Levels_t); |
f670688c EB |
499 | const size_t pagesDataSize = cfg.capacity * cfg.pageSize; |
500 | return StackSize(cfg.capacity) + pagesDataSize + levelsSize; | |
551f8a18 DK |
501 | } |
502 | ||
503 | size_t | |
c3408a33 | 504 | Ipc::Mem::PageStack::StackSize(const PageCount capacity) |
551f8a18 | 505 | { |
a5860854 AR |
506 | // Adding sizeof(PageStack) double-counts the fixed portion of the ids_ data |
507 | // member but it is better to overestimate (a little) than to underestimate | |
508 | // our memory needs due to padding, new data members, etc. | |
509 | return sizeof(PageStack) + IdSet::MemorySize(capacity); | |
551f8a18 DK |
510 | } |
511 | ||
512 | size_t | |
513 | Ipc::Mem::PageStack::stackSize() const | |
514 | { | |
f670688c | 515 | return StackSize(config_.capacity); |
2d0647fb | 516 | } |
f53969cc | 517 | |
2a10e977 | 518 | size_t |
c3408a33 | 519 | Ipc::Mem::PageStack::LevelsPaddingSize(const PageCount capacity) |
2a10e977 | 520 | { |
521 | const auto displacement = StackSize(capacity) % alignof(Levels_t); | |
522 | return displacement ? alignof(Levels_t) - displacement : 0; | |
523 | } | |
524 |