]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (C) 1996-2025 The Squid Software Foundation and contributors | |
3 | * | |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
9 | /* DEBUG: section 20 Storage Manager Swapout Functions */ | |
10 | ||
11 | #include "squid.h" | |
12 | #include "base/IoManip.h" | |
13 | #include "cbdata.h" | |
14 | #include "CollapsedForwarding.h" | |
15 | #include "globals.h" | |
16 | #include "Store.h" | |
17 | #include "StoreClient.h" | |
18 | // TODO: Abstract the use of this more | |
19 | #include "mem_node.h" | |
20 | #include "MemObject.h" | |
21 | #include "SquidConfig.h" | |
22 | #include "StatCounters.h" | |
23 | #include "store/Disk.h" | |
24 | #include "store/Disks.h" | |
25 | #include "store_log.h" | |
26 | #include "swap_log_op.h" | |
27 | ||
28 | static void storeSwapOutStart(StoreEntry * e); | |
29 | static StoreIOState::STIOCB storeSwapOutFileClosed; | |
30 | ||
31 | // wrapper to cross C/C++ ABI boundary. xfree is extern "C" for libraries. | |
32 | static void xfree_cppwrapper(void *x) | |
33 | { | |
34 | xfree(x); | |
35 | } | |
36 | ||
37 | /* start swapping object to disk */ | |
38 | static void | |
39 | storeSwapOutStart(StoreEntry * e) | |
40 | { | |
41 | MemObject *mem = e->mem_obj; | |
42 | StoreIOState::Pointer sio; | |
43 | assert(mem); | |
44 | /* Build the swap metadata, so the filesystem will know how much | |
45 | * metadata there is to store | |
46 | */ | |
47 | debugs(20, 5, "storeSwapOutStart: Begin SwapOut '" << e->url() << "' to dirno " << | |
48 | e->swap_dirn << ", fileno " << asHex(e->swap_filen).upperCase().minDigits(8)); | |
49 | /* If we start swapping out objects with OutOfBand Metadata, | |
50 | * then this code needs changing | |
51 | */ | |
52 | ||
53 | /* TODO: make some sort of data,size refcounted immutable buffer | |
54 | * and stop fooling ourselves with "const char*" buffers. | |
55 | */ | |
56 | ||
57 | // Create metadata now, possibly in vain: storeCreate needs swap_hdr_sz. | |
58 | const auto buf = e->getSerialisedMetaData(mem->swap_hdr_sz); | |
59 | assert(buf); | |
60 | ||
61 | /* Create the swap file */ | |
62 | generic_cbdata *c = new generic_cbdata(e); | |
63 | sio = storeCreate(e, storeSwapOutFileClosed, c); | |
64 | ||
65 | if (sio == nullptr) { | |
66 | assert(!e->hasDisk()); | |
67 | e->swap_status = SWAPOUT_NONE; | |
68 | e->swapOutDecision(MemObject::SwapOut::swImpossible); | |
69 | delete c; | |
70 | xfree((char*)buf); | |
71 | storeLog(STORE_LOG_SWAPOUTFAIL, e); | |
72 | return; | |
73 | } | |
74 | ||
75 | mem->swapout.sio = sio; | |
76 | /* Don't lock until after create, or the replacement | |
77 | * code might get confused */ | |
78 | ||
79 | e->lock("storeSwapOutStart"); | |
80 | /* Pick up the file number if it was assigned immediately */ | |
81 | e->attachToDisk(mem->swapout.sio->swap_dirn, mem->swapout.sio->swap_filen, SWAPOUT_WRITING); | |
82 | ||
83 | e->swapOutDecision(MemObject::SwapOut::swStarted); // after SWAPOUT_WRITING | |
84 | ||
85 | /* write out the swap metadata */ | |
86 | storeIOWrite(mem->swapout.sio, buf, mem->swap_hdr_sz, 0, xfree_cppwrapper); | |
87 | } | |
88 | ||
89 | static bool | |
90 | doPages(StoreEntry *anEntry) | |
91 | { | |
92 | MemObject *mem = anEntry->mem_obj; | |
93 | ||
94 | do { | |
95 | // find the page containing the first byte we have not swapped out yet | |
96 | mem_node *page = | |
97 | mem->data_hdr.getBlockContainingLocation(mem->swapout.queue_offset); | |
98 | ||
99 | if (!page) | |
100 | break; // wait for more data to become available | |
101 | ||
102 | // memNodeWriteComplete() and absence of buffer offset math below | |
103 | // imply that we always write from the very beginning of the page | |
104 | assert(page->start() == mem->swapout.queue_offset); | |
105 | ||
106 | /* | |
107 | * Get the length of this buffer. We are assuming(!) that the buffer | |
108 | * length won't change on this buffer, or things are going to be very | |
109 | * strange. I think that after the copy to a buffer is done, the buffer | |
110 | * size should stay fixed regardless so that this code isn't confused, | |
111 | * but we can look at this at a later date or whenever the code results | |
112 | * in bad swapouts, whichever happens first. :-) | |
113 | */ | |
114 | ssize_t swap_buf_len = page->nodeBuffer.length; | |
115 | ||
116 | debugs(20, 3, "storeSwapOut: swap_buf_len = " << swap_buf_len); | |
117 | ||
118 | assert(swap_buf_len > 0); | |
119 | ||
120 | debugs(20, 3, "storeSwapOut: swapping out " << swap_buf_len << " bytes from " << mem->swapout.queue_offset); | |
121 | ||
122 | mem->swapout.queue_offset += swap_buf_len; | |
123 | ||
124 | // Quit if write() fails. Sio is going to call our callback, and that | |
125 | // will cleanup, but, depending on the fs, that call may be async. | |
126 | const bool ok = mem->swapout.sio->write( | |
127 | mem->data_hdr.NodeGet(page), | |
128 | swap_buf_len, | |
129 | -1, | |
130 | memNodeWriteComplete); | |
131 | ||
132 | if (!ok || !anEntry->swappingOut()) | |
133 | return false; | |
134 | ||
135 | int64_t swapout_size = mem->endOffset() - mem->swapout.queue_offset; | |
136 | ||
137 | if (anEntry->store_status == STORE_PENDING) | |
138 | if (swapout_size < SM_PAGE_SIZE) | |
139 | break; | |
140 | ||
141 | if (swapout_size <= 0) | |
142 | break; | |
143 | } while (true); | |
144 | ||
145 | // either wait for more data or call swapOutFileClose() | |
146 | return true; | |
147 | } | |
148 | ||
149 | /* This routine is called every time data is sent to the client side. | |
150 | * It's overhead is therefor, significant. | |
151 | */ | |
152 | void | |
153 | StoreEntry::swapOut() | |
154 | { | |
155 | if (!mem_obj) | |
156 | return; | |
157 | ||
158 | // this flag may change so we must check even if we are swappingOut | |
159 | if (EBIT_TEST(flags, ENTRY_ABORTED)) { | |
160 | assert(EBIT_TEST(flags, RELEASE_REQUEST)); | |
161 | // StoreEntry::abort() already closed the swap out file, if any | |
162 | // no trimming: data producer must stop production if ENTRY_ABORTED | |
163 | return; | |
164 | } | |
165 | ||
166 | const bool weAreOrMayBeSwappingOut = swappingOut() || mayStartSwapOut(); | |
167 | ||
168 | Store::Root().memoryOut(*this, weAreOrMayBeSwappingOut); | |
169 | ||
170 | if (mem_obj->swapout.decision < MemObject::SwapOut::swPossible) | |
171 | return; // decided not to write to disk (at least for now) | |
172 | ||
173 | if (!weAreOrMayBeSwappingOut) | |
174 | return; // finished writing to disk after an earlier swStarted decision | |
175 | ||
176 | // Aborted entries have STORE_OK, but swapoutPossible rejects them. Thus, | |
177 | // store_status == STORE_OK below means we got everything we wanted. | |
178 | ||
179 | debugs(20, 7, "storeSwapOut: mem->inmem_lo = " << mem_obj->inmem_lo); | |
180 | debugs(20, 7, "storeSwapOut: mem->endOffset() = " << mem_obj->endOffset()); | |
181 | debugs(20, 7, "storeSwapOut: swapout.queue_offset = " << mem_obj->swapout.queue_offset); | |
182 | ||
183 | if (mem_obj->swapout.sio != nullptr) | |
184 | debugs(20, 7, "storeSwapOut: storeOffset() = " << mem_obj->swapout.sio->offset() ); | |
185 | ||
186 | int64_t const lowest_offset = mem_obj->lowestMemReaderOffset(); | |
187 | ||
188 | debugs(20, 7, "storeSwapOut: lowest_offset = " << lowest_offset); | |
189 | ||
190 | #if SIZEOF_OFF_T <= 4 | |
191 | ||
192 | if (mem_obj->endOffset() > 0x7FFF0000) { | |
193 | debugs(20, DBG_CRITICAL, "WARNING: preventing off_t overflow for " << url()); | |
194 | abort(); | |
195 | return; | |
196 | } | |
197 | ||
198 | #endif | |
199 | if (swappingOut()) | |
200 | assert(mem_obj->inmem_lo <= mem_obj->objectBytesOnDisk() ); | |
201 | ||
202 | // buffered bytes we have not swapped out yet | |
203 | const int64_t swapout_maxsize = mem_obj->availableForSwapOut(); | |
204 | assert(swapout_maxsize >= 0); | |
205 | debugs(20, 7, "storeSwapOut: swapout_size = " << swapout_maxsize); | |
206 | ||
207 | if (swapout_maxsize == 0) { // swapped everything we got | |
208 | if (store_status == STORE_OK) { // got everything we wanted | |
209 | assert(mem_obj->object_sz >= 0); | |
210 | swapOutFileClose(StoreIOState::wroteAll); | |
211 | } | |
212 | // else need more data to swap out | |
213 | return; | |
214 | } | |
215 | ||
216 | if (store_status == STORE_PENDING) { | |
217 | /* wait for a full block to write */ | |
218 | ||
219 | if (swapout_maxsize < SM_PAGE_SIZE) | |
220 | return; | |
221 | ||
222 | /* | |
223 | * Wait until we are below the disk FD limit, only if the | |
224 | * next read won't be deferred. | |
225 | */ | |
226 | if (storeTooManyDiskFilesOpen() && !checkDeferRead(-1)) | |
227 | return; | |
228 | } | |
229 | ||
230 | /* Ok, we have stuff to swap out. Is there a swapout.sio open? */ | |
231 | if (!hasDisk()) { | |
232 | assert(mem_obj->swapout.sio == nullptr); | |
233 | assert(mem_obj->inmem_lo == 0); | |
234 | storeSwapOutStart(this); // sets SwapOut::swImpossible on failures | |
235 | } | |
236 | ||
237 | if (mem_obj->swapout.sio == nullptr) | |
238 | return; | |
239 | ||
240 | if (!doPages(this)) | |
241 | /* oops, we're not swapping out any more */ | |
242 | return; | |
243 | ||
244 | if (store_status == STORE_OK) { | |
245 | /* | |
246 | * If the state is STORE_OK, then all data must have been given | |
247 | * to the filesystem at this point because storeSwapOut() is | |
248 | * not going to be called again for this entry. | |
249 | */ | |
250 | assert(mem_obj->object_sz >= 0); | |
251 | assert(mem_obj->endOffset() == mem_obj->swapout.queue_offset); | |
252 | swapOutFileClose(StoreIOState::wroteAll); | |
253 | } | |
254 | } | |
255 | ||
256 | void | |
257 | StoreEntry::swapOutFileClose(int how) | |
258 | { | |
259 | assert(mem_obj != nullptr); | |
260 | debugs(20, 3, "storeSwapOutFileClose: " << getMD5Text() << " how=" << how); | |
261 | debugs(20, 3, "storeSwapOutFileClose: sio = " << mem_obj->swapout.sio.getRaw()); | |
262 | ||
263 | if (mem_obj->swapout.sio == nullptr) | |
264 | return; | |
265 | ||
266 | storeClose(mem_obj->swapout.sio, how); | |
267 | } | |
268 | ||
269 | static void | |
270 | storeSwapOutFileClosed(void *data, int errflag, StoreIOState::Pointer self) | |
271 | { | |
272 | StoreEntry *e; | |
273 | static_cast<generic_cbdata *>(data)->unwrap(&e); | |
274 | ||
275 | MemObject *mem = e->mem_obj; | |
276 | assert(mem->swapout.sio == self); | |
277 | assert(e->swappingOut()); | |
278 | ||
279 | // if object_size is still unknown, the entry was probably aborted | |
280 | if (errflag || e->objectLen() < 0) { | |
281 | debugs(20, 2, "storeSwapOutFileClosed: dirno " << e->swap_dirn << ", swapfile " << | |
282 | asHex(e->swap_filen).upperCase().minDigits(8) << | |
283 | ", errflag=" << errflag); | |
284 | ||
285 | if (errflag == DISK_NO_SPACE_LEFT) { | |
286 | /* TODO: this should be handle by the link from store IO to | |
287 | * Store, rather than being a top level API call. | |
288 | */ | |
289 | e->disk().diskFull(); | |
290 | storeConfigure(); | |
291 | } | |
292 | ||
293 | // mark the locked entry for deletion | |
294 | // TODO: Keep the memory entry (if any) | |
295 | e->releaseRequest(); | |
296 | e->swap_status = SWAPOUT_FAILED; | |
297 | e->disk().finalizeSwapoutFailure(*e); | |
298 | } else { | |
299 | /* swapping complete */ | |
300 | debugs(20, 3, "storeSwapOutFileClosed: SwapOut complete: '" << e->url() << "' to " << | |
301 | e->swap_dirn << ", " << asHex(e->swap_filen).upperCase().minDigits(8)); | |
302 | debugs(20, 5, "swap_file_sz = " << | |
303 | e->objectLen() << " + " << mem->swap_hdr_sz); | |
304 | ||
305 | e->swap_file_sz = e->objectLen() + mem->swap_hdr_sz; | |
306 | e->swap_status = SWAPOUT_DONE; | |
307 | e->disk().finalizeSwapoutSuccess(*e); | |
308 | ||
309 | // XXX: For some Stores, it is pointless to re-check cachability here | |
310 | // and it leads to double counts in store_check_cachable_hist. We need | |
311 | // another way to signal a completed but failed swapout. Or, better, | |
312 | // each Store should handle its own logging and LOG state setting. | |
313 | if (e->checkCachable()) { | |
314 | storeLog(STORE_LOG_SWAPOUT, e); | |
315 | storeDirSwapLog(e, SWAP_LOG_ADD); | |
316 | } | |
317 | ||
318 | ++statCounter.swap.outs; | |
319 | } | |
320 | ||
321 | debugs(20, 3, "storeSwapOutFileClosed: " << __FILE__ << ":" << __LINE__); | |
322 | mem->swapout.sio = nullptr; | |
323 | e->storeWriterDone(); // after updating swap_status | |
324 | e->unlock("storeSwapOutFileClosed"); | |
325 | } | |
326 | ||
327 | bool | |
328 | StoreEntry::mayStartSwapOut() | |
329 | { | |
330 | // must be checked in the caller | |
331 | assert(!EBIT_TEST(flags, ENTRY_ABORTED)); | |
332 | assert(!swappingOut()); | |
333 | ||
334 | if (!Config.cacheSwap.n_configured) | |
335 | return false; | |
336 | ||
337 | assert(mem_obj); | |
338 | const MemObject::SwapOut::Decision &decision = mem_obj->swapout.decision; | |
339 | ||
340 | // if we decided that starting is not possible, do not repeat same checks | |
341 | if (decision == MemObject::SwapOut::swImpossible) { | |
342 | debugs(20, 3, " already rejected"); | |
343 | return false; | |
344 | } | |
345 | ||
346 | // If we have started swapping out, do not start over. Most likely, we have | |
347 | // finished swapping out by now because we are not currently swappingOut(). | |
348 | if (decision == MemObject::SwapOut::swStarted) { | |
349 | debugs(20, 3, "already started"); | |
350 | return false; | |
351 | } | |
352 | ||
353 | if (shutting_down) { | |
354 | debugs(20, 3, "avoid heavy optional work during shutdown"); | |
355 | swapOutDecision(MemObject::SwapOut::swImpossible); | |
356 | return false; | |
357 | } | |
358 | ||
359 | // if there is a usable disk entry already, do not start over | |
360 | if (hasDisk() || Store::Root().hasReadableDiskEntry(*this)) { | |
361 | debugs(20, 3, "already did"); // we or somebody else created that entry | |
362 | swapOutDecision(MemObject::SwapOut::swImpossible); | |
363 | return false; | |
364 | } | |
365 | ||
366 | if (Store::Root().markedForDeletionAndAbandoned(*this)) { | |
367 | debugs(20, 3, "marked for deletion and abandoned"); | |
368 | swapOutDecision(MemObject::SwapOut::swImpossible); | |
369 | return false; | |
370 | } | |
371 | ||
372 | // if we decided that swapout is possible, do not repeat same checks | |
373 | if (decision == MemObject::SwapOut::swPossible) { | |
374 | debugs(20, 3, "already allowed"); | |
375 | return true; | |
376 | } | |
377 | ||
378 | // To avoid SMP workers releasing each other caching attempts, restrict disk | |
379 | // caching to StoreEntry publisher. This check goes before checkCachable() | |
380 | // that may incorrectly release() publisher's entry. | |
381 | if (Store::Root().transientsReader(*this)) { | |
382 | debugs(20, 5, "yield to entry publisher"); | |
383 | swapOutDecision(MemObject::SwapOut::swImpossible); | |
384 | return false; | |
385 | } | |
386 | ||
387 | if (!checkCachable()) { | |
388 | debugs(20, 3, "not cachable"); | |
389 | swapOutDecision(MemObject::SwapOut::swImpossible); | |
390 | return false; | |
391 | } | |
392 | ||
393 | if (EBIT_TEST(flags, ENTRY_SPECIAL)) { | |
394 | debugs(20, 3, url() << " SPECIAL"); | |
395 | swapOutDecision(MemObject::SwapOut::swImpossible); | |
396 | return false; | |
397 | } | |
398 | ||
399 | if (mem_obj->inmem_lo > 0) { | |
400 | debugs(20, 3, "storeSwapOut: (inmem_lo > 0) imem_lo:" << mem_obj->inmem_lo); | |
401 | swapOutDecision(MemObject::SwapOut::swImpossible); | |
402 | return false; | |
403 | } | |
404 | ||
405 | if (!mem_obj->isContiguous()) { | |
406 | debugs(20, 3, "storeSwapOut: not Contiguous"); | |
407 | swapOutDecision(MemObject::SwapOut::swImpossible); | |
408 | return false; | |
409 | } | |
410 | ||
411 | // handle store_maxobjsize limit | |
412 | { | |
413 | // TODO: add estimated store metadata size to be conservative | |
414 | ||
415 | // use guaranteed maximum if it is known | |
416 | const int64_t expectedEnd = mem_obj->expectedReplySize(); | |
417 | debugs(20, 7, "expectedEnd = " << expectedEnd); | |
418 | if (expectedEnd > store_maxobjsize) { | |
419 | debugs(20, 3, "will not fit: " << expectedEnd << | |
420 | " > " << store_maxobjsize); | |
421 | swapOutDecision(MemObject::SwapOut::swImpossible); | |
422 | return false; // known to outgrow the limit eventually | |
423 | } | |
424 | ||
425 | // use current minimum (always known) | |
426 | const int64_t currentEnd = mem_obj->endOffset(); | |
427 | if (currentEnd > store_maxobjsize) { | |
428 | debugs(20, 3, "does not fit: " << currentEnd << | |
429 | " > " << store_maxobjsize); | |
430 | swapOutDecision(MemObject::SwapOut::swImpossible); | |
431 | return false; // already does not fit and may only get bigger | |
432 | } | |
433 | ||
434 | // prevent final default swPossible answer for yet unknown length | |
435 | if (expectedEnd < 0 && store_status != STORE_OK) { | |
436 | const int64_t more = Store::Root().accumulateMore(*this); | |
437 | if (more > 0) { | |
438 | debugs(20, 5, "got " << currentEnd << "; defer decision for " << more << " more bytes"); | |
439 | return true; // may still fit, but no final decision yet | |
440 | } | |
441 | } | |
442 | } | |
443 | ||
444 | swapOutDecision(MemObject::SwapOut::swPossible); | |
445 | return true; | |
446 | } | |
447 |