]>
Commit | Line | Data |
---|---|---|
e2851fe7 AR |
1 | /* |
2 | * $Id$ | |
3 | * | |
4 | * DEBUG: section 47 Store Directory Routines | |
5 | */ | |
6 | ||
f7f3304a | 7 | #include "squid.h" |
43ebbac3 | 8 | #include "ConfigOption.h" |
e2851fe7 AR |
9 | #include "DiskIO/DiskIOModule.h" |
10 | #include "DiskIO/DiskIOStrategy.h" | |
11 | #include "DiskIO/ReadRequest.h" | |
12 | #include "DiskIO/WriteRequest.h" | |
13 | #include "fs/rock/RockSwapDir.h" | |
14 | #include "fs/rock/RockIoState.h" | |
15 | #include "fs/rock/RockIoRequests.h" | |
16 | #include "fs/rock/RockRebuild.h" | |
e0bdae60 | 17 | #include "ipc/mem/Pages.h" |
f5adb654 AR |
18 | #include "MemObject.h" |
19 | #include "Parsing.h" | |
582c2af2 | 20 | #include "protos.h" |
f5adb654 | 21 | #include "SquidMath.h" |
58373ff8 | 22 | #include <cstdlib> |
f5adb654 | 23 | #include <iomanip> |
e2851fe7 | 24 | |
582c2af2 FC |
25 | #if HAVE_SYS_STAT_H |
26 | #include <sys/stat.h> | |
27 | #endif | |
28 | ||
e2851fe7 AR |
29 | const int64_t Rock::SwapDir::HeaderSize = 16*1024; |
30 | ||
902df398 | 31 | Rock::SwapDir::SwapDir(): ::SwapDir("rock"), filePath(NULL), io(NULL), map(NULL) |
e2851fe7 AR |
32 | { |
33 | } | |
34 | ||
35 | Rock::SwapDir::~SwapDir() | |
36 | { | |
37 | delete io; | |
f1eaa254 | 38 | delete map; |
e2851fe7 AR |
39 | safe_free(filePath); |
40 | } | |
41 | ||
42 | StoreSearch * | |
43 | Rock::SwapDir::search(String const url, HttpRequest *) | |
44 | { | |
9199139f AR |
45 | assert(false); |
46 | return NULL; // XXX: implement | |
e2851fe7 AR |
47 | } |
48 | ||
79672f4f AR |
49 | void |
50 | Rock::SwapDir::get(String const key, STOREGETCLIENT cb, void *data) | |
51 | { | |
52 | ::SwapDir::get(key, cb, data); | |
53 | } | |
54 | ||
f1debb5e DK |
55 | // called when Squid core needs a StoreEntry with a given key |
56 | StoreEntry * | |
57 | Rock::SwapDir::get(const cache_key *key) | |
58 | { | |
0a11e039 | 59 | if (!map || !theFile || !theFile->canRead()) |
8abe1173 DK |
60 | return NULL; |
61 | ||
5b3ea321 DK |
62 | sfileno filen; |
63 | const Ipc::StoreMapSlot *const slot = map->openForReading(key, filen); | |
44c95fcf | 64 | if (!slot) |
f1debb5e DK |
65 | return NULL; |
66 | ||
44c95fcf AR |
67 | const Ipc::StoreMapSlot::Basics &basics = slot->basics; |
68 | ||
f1debb5e DK |
69 | // create a brand new store entry and initialize it with stored basics |
70 | StoreEntry *e = new StoreEntry(); | |
71 | e->lock_count = 0; | |
72 | e->swap_dirn = index; | |
5b3ea321 | 73 | e->swap_filen = filen; |
44c95fcf AR |
74 | e->swap_file_sz = basics.swap_file_sz; |
75 | e->lastref = basics.lastref; | |
76 | e->timestamp = basics.timestamp; | |
77 | e->expires = basics.expires; | |
78 | e->lastmod = basics.lastmod; | |
79 | e->refcount = basics.refcount; | |
80 | e->flags = basics.flags; | |
f1debb5e DK |
81 | e->store_status = STORE_OK; |
82 | e->setMemStatus(NOT_IN_MEMORY); | |
83 | e->swap_status = SWAPOUT_DONE; | |
84 | e->ping_status = PING_NONE; | |
85 | EBIT_SET(e->flags, ENTRY_CACHABLE); | |
86 | EBIT_CLR(e->flags, RELEASE_REQUEST); | |
87 | EBIT_CLR(e->flags, KEY_PRIVATE); | |
88 | EBIT_SET(e->flags, ENTRY_VALIDATED); | |
e1825c5d | 89 | e->hashInsert(key); |
f1debb5e DK |
90 | trackReferences(*e); |
91 | ||
92 | return e; | |
93 | // the disk entry remains open for reading, protected from modifications | |
94 | } | |
95 | ||
f58bb2f4 | 96 | void Rock::SwapDir::disconnect(StoreEntry &e) |
6d8d05b5 | 97 | { |
f58bb2f4 | 98 | assert(e.swap_dirn == index); |
6d8d05b5 | 99 | assert(e.swap_filen >= 0); |
f58bb2f4 AR |
100 | // cannot have SWAPOUT_NONE entry with swap_filen >= 0 |
101 | assert(e.swap_status != SWAPOUT_NONE); | |
102 | ||
103 | // do not rely on e.swap_status here because there is an async delay | |
104 | // before it switches from SWAPOUT_WRITING to SWAPOUT_DONE. | |
105 | ||
106 | // since e has swap_filen, its slot is locked for either reading or writing | |
107 | map->abortIo(e.swap_filen); | |
6d8d05b5 DK |
108 | e.swap_dirn = -1; |
109 | e.swap_filen = -1; | |
f58bb2f4 | 110 | e.swap_status = SWAPOUT_NONE; |
6d8d05b5 DK |
111 | } |
112 | ||
39c1e1d9 DK |
113 | uint64_t |
114 | Rock::SwapDir::currentSize() const | |
115 | { | |
57f583f1 | 116 | return HeaderSize + max_objsize * currentCount(); |
39c1e1d9 DK |
117 | } |
118 | ||
119 | uint64_t | |
120 | Rock::SwapDir::currentCount() const | |
121 | { | |
122 | return map ? map->entryCount() : 0; | |
123 | } | |
124 | ||
125 | /// In SMP mode only the disker process reports stats to avoid | |
126 | /// counting the same stats by multiple processes. | |
127 | bool | |
128 | Rock::SwapDir::doReportStat() const | |
129 | { | |
130 | return ::SwapDir::doReportStat() && (!UsingSmp() || IamDiskProcess()); | |
131 | } | |
132 | ||
da9d3191 DK |
133 | void |
134 | Rock::SwapDir::swappedOut(const StoreEntry &) | |
135 | { | |
136 | // stats are not stored but computed when needed | |
137 | } | |
138 | ||
b3165da6 DK |
139 | int64_t |
140 | Rock::SwapDir::entryLimitAllowed() const | |
141 | { | |
142 | const int64_t eLimitLo = map ? map->entryLimit() : 0; // dynamic shrinking unsupported | |
cc34568d | 143 | const int64_t eWanted = (maxSize() - HeaderSize)/maxObjectSize(); |
b3165da6 DK |
144 | return min(max(eLimitLo, eWanted), entryLimitHigh()); |
145 | } | |
146 | ||
e2851fe7 AR |
147 | // TODO: encapsulate as a tool; identical to CossSwapDir::create() |
148 | void | |
149 | Rock::SwapDir::create() | |
150 | { | |
151 | assert(path); | |
152 | assert(filePath); | |
153 | ||
984d890b AR |
154 | if (UsingSmp() && !IamDiskProcess()) { |
155 | debugs (47,3, HERE << "disker will create in " << path); | |
156 | return; | |
157 | } | |
158 | ||
e2851fe7 AR |
159 | debugs (47,3, HERE << "creating in " << path); |
160 | ||
161 | struct stat swap_sb; | |
162 | if (::stat(path, &swap_sb) < 0) { | |
51618c6a | 163 | debugs (47, DBG_IMPORTANT, "Creating Rock db directory: " << path); |
e2851fe7 | 164 | const int res = mkdir(path, 0700); |
e2851fe7 | 165 | if (res != 0) { |
51618c6a | 166 | debugs(47, DBG_CRITICAL, "Failed to create Rock db dir " << path << |
9199139f | 167 | ": " << xstrerror()); |
e2851fe7 | 168 | fatal("Rock Store db creation error"); |
9199139f AR |
169 | } |
170 | } | |
e2851fe7 AR |
171 | |
172 | #if SLOWLY_FILL_WITH_ZEROS | |
cc34568d DK |
173 | char block[1024]; |
174 | Must(maxSize() % sizeof(block) == 0); | |
e2851fe7 AR |
175 | memset(block, '\0', sizeof(block)); |
176 | ||
177 | const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600); | |
cc34568d | 178 | for (off_t offset = 0; offset < maxSize(); offset += sizeof(block)) { |
e2851fe7 | 179 | if (write(swap, block, sizeof(block)) != sizeof(block)) { |
f5adb654 | 180 | debugs(47, DBG_CRITICAL, "ERROR: Failed to create Rock Store db in " << filePath << |
9199139f | 181 | ": " << xstrerror()); |
e2851fe7 | 182 | fatal("Rock Store db creation error"); |
9199139f AR |
183 | } |
184 | } | |
e2851fe7 AR |
185 | close(swap); |
186 | #else | |
187 | const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600); | |
188 | if (swap < 0) { | |
f5adb654 | 189 | debugs(47, DBG_CRITICAL, "ERROR: Failed to initialize Rock Store db in " << filePath << |
9199139f | 190 | "; create error: " << xstrerror()); |
e2851fe7 AR |
191 | fatal("Rock Store db creation error"); |
192 | } | |
193 | ||
cc34568d | 194 | if (ftruncate(swap, maxSize()) != 0) { |
f5adb654 | 195 | debugs(47, DBG_CRITICAL, "ERROR: Failed to initialize Rock Store db in " << filePath << |
9199139f | 196 | "; truncate error: " << xstrerror()); |
e2851fe7 AR |
197 | fatal("Rock Store db creation error"); |
198 | } | |
199 | ||
200 | char header[HeaderSize]; | |
201 | memset(header, '\0', sizeof(header)); | |
202 | if (write(swap, header, sizeof(header)) != sizeof(header)) { | |
f5adb654 | 203 | debugs(47, DBG_CRITICAL, "ERROR: Failed to initialize Rock Store db in " << filePath << |
9199139f | 204 | "; write error: " << xstrerror()); |
e2851fe7 AR |
205 | fatal("Rock Store db initialization error"); |
206 | } | |
207 | close(swap); | |
208 | #endif | |
e2851fe7 AR |
209 | } |
210 | ||
211 | void | |
212 | Rock::SwapDir::init() | |
213 | { | |
214 | debugs(47,2, HERE); | |
215 | ||
216 | // XXX: SwapDirs aren't refcounted. We make IORequestor calls, which | |
217 | // are refcounted. We up our count once to avoid implicit delete's. | |
218 | RefCountReference(); | |
219 | ||
902df398 DK |
220 | Must(!map); |
221 | map = new DirMap(path); | |
f7091279 | 222 | |
3b581957 | 223 | const char *ioModule = needsDiskStrand() ? "IpcIo" : "Blocking"; |
c03c2bad AR |
224 | if (DiskIOModule *m = DiskIOModule::Find(ioModule)) { |
225 | debugs(47,2, HERE << "Using DiskIO module: " << ioModule); | |
226 | io = m->createStrategy(); | |
227 | io->init(); | |
228 | } else { | |
51618c6a AR |
229 | debugs(47, DBG_CRITICAL, "FATAL: Rock store is missing DiskIO module: " << |
230 | ioModule); | |
c03c2bad AR |
231 | fatal("Rock Store missing a required DiskIO module"); |
232 | } | |
e2851fe7 AR |
233 | |
234 | theFile = io->newFile(filePath); | |
43ebbac3 | 235 | theFile->configure(fileConfig); |
e2851fe7 | 236 | theFile->open(O_RDWR, 0644, this); |
078274f6 AR |
237 | |
238 | // Increment early. Otherwise, if one SwapDir finishes rebuild before | |
239 | // others start, storeRebuildComplete() will think the rebuild is over! | |
240 | // TODO: move store_dirs_rebuilding hack to store modules that need it. | |
241 | ++StoreController::store_dirs_rebuilding; | |
e2851fe7 AR |
242 | } |
243 | ||
14911a4e AR |
244 | bool |
245 | Rock::SwapDir::needsDiskStrand() const | |
246 | { | |
3b581957 DK |
247 | const bool wontEvenWorkWithoutDisker = Config.workers > 1; |
248 | const bool wouldWorkBetterWithDisker = DiskIOModule::Find("IpcIo"); | |
249 | return InDaemonMode() && (wontEvenWorkWithoutDisker || | |
e29ccb57 | 250 | wouldWorkBetterWithDisker); |
14911a4e AR |
251 | } |
252 | ||
e2851fe7 AR |
253 | void |
254 | Rock::SwapDir::parse(int anIndex, char *aPath) | |
255 | { | |
256 | index = anIndex; | |
257 | ||
258 | path = xstrdup(aPath); | |
259 | ||
260 | // cache store is located at path/db | |
261 | String fname(path); | |
262 | fname.append("/rock"); | |
263 | filePath = xstrdup(fname.termedBuf()); | |
264 | ||
24063512 | 265 | parseSize(false); |
e2851fe7 AR |
266 | parseOptions(0); |
267 | ||
f428c9c4 AR |
268 | // Current openForWriting() code overwrites the old slot if needed |
269 | // and possible, so proactively removing old slots is probably useless. | |
0e240235 | 270 | assert(!repl); // repl = createRemovalPolicy(Config.replPolicy); |
e2851fe7 AR |
271 | |
272 | validateOptions(); | |
273 | } | |
274 | ||
275 | void | |
c6059970 | 276 | Rock::SwapDir::reconfigure() |
e2851fe7 | 277 | { |
24063512 | 278 | parseSize(true); |
e2851fe7 AR |
279 | parseOptions(1); |
280 | // TODO: can we reconfigure the replacement policy (repl)? | |
281 | validateOptions(); | |
282 | } | |
283 | ||
284 | /// parse maximum db disk size | |
285 | void | |
24063512 | 286 | Rock::SwapDir::parseSize(const bool reconfiguring) |
e2851fe7 | 287 | { |
cc34568d DK |
288 | const int i = GetInteger(); |
289 | if (i < 0) | |
e2851fe7 | 290 | fatal("negative Rock cache_dir size value"); |
24063512 DK |
291 | const uint64_t new_max_size = |
292 | static_cast<uint64_t>(i) << 20; // MBytes to Bytes | |
293 | if (!reconfiguring) | |
294 | max_size = new_max_size; | |
295 | else if (new_max_size != max_size) { | |
296 | debugs(3, DBG_IMPORTANT, "WARNING: cache_dir '" << path << "' size " | |
297 | "cannot be changed dynamically, value left unchanged (" << | |
298 | (max_size >> 20) << " MB)"); | |
299 | } | |
e2851fe7 AR |
300 | } |
301 | ||
43ebbac3 AR |
302 | ConfigOption * |
303 | Rock::SwapDir::getOptionTree() const | |
304 | { | |
305 | ConfigOptionVector *vector = dynamic_cast<ConfigOptionVector*>(::SwapDir::getOptionTree()); | |
306 | assert(vector); | |
307 | vector->options.push_back(new ConfigOptionAdapter<SwapDir>(*const_cast<SwapDir *>(this), &SwapDir::parseTimeOption, &SwapDir::dumpTimeOption)); | |
df881a0f | 308 | vector->options.push_back(new ConfigOptionAdapter<SwapDir>(*const_cast<SwapDir *>(this), &SwapDir::parseRateOption, &SwapDir::dumpRateOption)); |
43ebbac3 AR |
309 | return vector; |
310 | } | |
311 | ||
24063512 DK |
312 | bool |
313 | Rock::SwapDir::allowOptionReconfigure(const char *const option) const | |
314 | { | |
315 | return strcmp(option, "max-size") != 0 && | |
16fea83b | 316 | ::SwapDir::allowOptionReconfigure(option); |
24063512 DK |
317 | } |
318 | ||
43ebbac3 AR |
319 | /// parses time-specific options; mimics ::SwapDir::optionObjectSizeParse() |
320 | bool | |
321 | Rock::SwapDir::parseTimeOption(char const *option, const char *value, int reconfiguring) | |
322 | { | |
323 | // TODO: ::SwapDir or, better, Config should provide time-parsing routines, | |
324 | // including time unit handling. Same for size. | |
325 | ||
326 | time_msec_t *storedTime; | |
327 | if (strcmp(option, "swap-timeout") == 0) | |
328 | storedTime = &fileConfig.ioTimeout; | |
329 | else | |
330 | return false; | |
331 | ||
332 | if (!value) | |
333 | self_destruct(); | |
334 | ||
df881a0f AR |
335 | // TODO: handle time units and detect parsing errors better |
336 | const int64_t parsedValue = strtoll(value, NULL, 10); | |
337 | if (parsedValue < 0) { | |
338 | debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must not be negative but is: " << parsedValue); | |
43ebbac3 AR |
339 | self_destruct(); |
340 | } | |
341 | ||
df881a0f AR |
342 | const time_msec_t newTime = static_cast<time_msec_t>(parsedValue); |
343 | ||
7846d084 DK |
344 | if (!reconfiguring) |
345 | *storedTime = newTime; | |
346 | else if (*storedTime != newTime) { | |
347 | debugs(3, DBG_IMPORTANT, "WARNING: cache_dir " << path << ' ' << option | |
348 | << " cannot be changed dynamically, value left unchanged: " << | |
349 | *storedTime); | |
350 | } | |
43ebbac3 AR |
351 | |
352 | return true; | |
353 | } | |
354 | ||
355 | /// reports time-specific options; mimics ::SwapDir::optionObjectSizeDump() | |
356 | void | |
357 | Rock::SwapDir::dumpTimeOption(StoreEntry * e) const | |
358 | { | |
359 | if (fileConfig.ioTimeout) | |
c91ca3ce | 360 | storeAppendPrintf(e, " swap-timeout=%" PRId64, |
43ebbac3 AR |
361 | static_cast<int64_t>(fileConfig.ioTimeout)); |
362 | } | |
363 | ||
df881a0f AR |
364 | /// parses rate-specific options; mimics ::SwapDir::optionObjectSizeParse() |
365 | bool | |
366 | Rock::SwapDir::parseRateOption(char const *option, const char *value, int isaReconfig) | |
367 | { | |
368 | int *storedRate; | |
369 | if (strcmp(option, "max-swap-rate") == 0) | |
370 | storedRate = &fileConfig.ioRate; | |
371 | else | |
372 | return false; | |
373 | ||
374 | if (!value) | |
375 | self_destruct(); | |
376 | ||
377 | // TODO: handle time units and detect parsing errors better | |
378 | const int64_t parsedValue = strtoll(value, NULL, 10); | |
379 | if (parsedValue < 0) { | |
380 | debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must not be negative but is: " << parsedValue); | |
381 | self_destruct(); | |
382 | } | |
383 | ||
384 | const int newRate = static_cast<int>(parsedValue); | |
385 | ||
386 | if (newRate < 0) { | |
387 | debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must not be negative but is: " << newRate); | |
388 | self_destruct(); | |
389 | } | |
390 | ||
7846d084 DK |
391 | if (!isaReconfig) |
392 | *storedRate = newRate; | |
393 | else if (*storedRate != newRate) { | |
394 | debugs(3, DBG_IMPORTANT, "WARNING: cache_dir " << path << ' ' << option | |
395 | << " cannot be changed dynamically, value left unchanged: " << | |
396 | *storedRate); | |
397 | } | |
df881a0f AR |
398 | |
399 | return true; | |
400 | } | |
401 | ||
402 | /// reports rate-specific options; mimics ::SwapDir::optionObjectSizeDump() | |
403 | void | |
404 | Rock::SwapDir::dumpRateOption(StoreEntry * e) const | |
405 | { | |
406 | if (fileConfig.ioRate >= 0) | |
407 | storeAppendPrintf(e, " max-swap-rate=%d", fileConfig.ioRate); | |
408 | } | |
409 | ||
e2851fe7 AR |
410 | /// check the results of the configuration; only level-0 debugging works here |
411 | void | |
412 | Rock::SwapDir::validateOptions() | |
413 | { | |
414 | if (max_objsize <= 0) | |
415 | fatal("Rock store requires a positive max-size"); | |
416 | ||
9dc492d0 DK |
417 | const int64_t maxSizeRoundingWaste = 1024 * 1024; // size is configured in MB |
418 | const int64_t maxObjectSizeRoundingWaste = maxObjectSize(); | |
419 | const int64_t maxRoundingWaste = | |
420 | max(maxSizeRoundingWaste, maxObjectSizeRoundingWaste); | |
421 | const int64_t usableDiskSize = diskOffset(entryLimitAllowed()); | |
422 | const int64_t diskWasteSize = maxSize() - usableDiskSize; | |
423 | Must(diskWasteSize >= 0); | |
e2851fe7 AR |
424 | |
425 | // warn if maximum db size is not reachable due to sfileno limit | |
9dc492d0 | 426 | if (entryLimitAllowed() == entryLimitHigh() && |
e29ccb57 | 427 | diskWasteSize >= maxRoundingWaste) { |
9dc492d0 DK |
428 | debugs(47, DBG_CRITICAL, "Rock store cache_dir[" << index << "] '" << path << "':"); |
429 | debugs(47, DBG_CRITICAL, "\tmaximum number of entries: " << entryLimitAllowed()); | |
430 | debugs(47, DBG_CRITICAL, "\tmaximum object size: " << maxObjectSize() << " Bytes"); | |
431 | debugs(47, DBG_CRITICAL, "\tmaximum db size: " << maxSize() << " Bytes"); | |
432 | debugs(47, DBG_CRITICAL, "\tusable db size: " << usableDiskSize << " Bytes"); | |
433 | debugs(47, DBG_CRITICAL, "\tdisk space waste: " << diskWasteSize << " Bytes"); | |
434 | debugs(47, DBG_CRITICAL, "WARNING: Rock store config wastes space."); | |
9199139f | 435 | } |
e2851fe7 AR |
436 | } |
437 | ||
438 | void | |
9199139f AR |
439 | Rock::SwapDir::rebuild() |
440 | { | |
078274f6 AR |
441 | //++StoreController::store_dirs_rebuilding; // see Rock::SwapDir::init() |
442 | AsyncJob::Start(new Rebuild(this)); | |
e2851fe7 AR |
443 | } |
444 | ||
445 | /* Add a new object to the cache with empty memory copy and pointer to disk | |
395a85b4 | 446 | * use to rebuild store from disk. Based on UFSSwapDir::addDiskRestore */ |
f1eaa254 | 447 | bool |
5b3ea321 | 448 | Rock::SwapDir::addEntry(const int filen, const DbCellHeader &header, const StoreEntry &from) |
e2851fe7 | 449 | { |
395a85b4 | 450 | debugs(47, 8, HERE << &from << ' ' << from.getMD5Text() << |
9199139f AR |
451 | ", filen="<< std::setfill('0') << std::hex << std::uppercase << |
452 | std::setw(8) << filen); | |
44704b50 | 453 | |
44c95fcf AR |
454 | sfileno newLocation = 0; |
455 | if (Ipc::StoreMapSlot *slot = map->openForWriting(reinterpret_cast<const cache_key *>(from.key), newLocation)) { | |
5b3ea321 | 456 | if (filen == newLocation) { |
44c95fcf | 457 | slot->set(from); |
5b3ea321 | 458 | map->extras(filen) = header; |
44c95fcf AR |
459 | } // else some other, newer entry got into our cell |
460 | map->closeForWriting(newLocation, false); | |
5b3ea321 | 461 | return filen == newLocation; |
44704b50 | 462 | } |
395a85b4 AR |
463 | |
464 | return false; | |
e2851fe7 AR |
465 | } |
466 | ||
c728b6f9 AR |
467 | bool |
468 | Rock::SwapDir::canStore(const StoreEntry &e, int64_t diskSpaceNeeded, int &load) const | |
e2851fe7 | 469 | { |
c728b6f9 AR |
470 | if (!::SwapDir::canStore(e, sizeof(DbCellHeader)+diskSpaceNeeded, load)) |
471 | return false; | |
e2851fe7 | 472 | |
c728b6f9 AR |
473 | if (!theFile || !theFile->canWrite()) |
474 | return false; | |
8abe1173 DK |
475 | |
476 | if (!map) | |
c728b6f9 | 477 | return false; |
8abe1173 | 478 | |
e0f3492c | 479 | // Do not start I/O transaction if there are less than 10% free pages left. |
551f8a18 | 480 | // TODO: reserve page instead |
7ef5aa64 | 481 | if (needsDiskStrand() && |
e29ccb57 | 482 | Ipc::Mem::PageLevel(Ipc::Mem::PageId::ioPage) >= 0.9 * Ipc::Mem::PageLimit(Ipc::Mem::PageId::ioPage)) { |
551f8a18 | 483 | debugs(47, 5, HERE << "too few shared pages for IPC I/O left"); |
e0bdae60 DK |
484 | return false; |
485 | } | |
486 | ||
e2851fe7 | 487 | if (io->shedLoad()) |
c728b6f9 | 488 | return false; |
e2851fe7 | 489 | |
c728b6f9 AR |
490 | load = io->load(); |
491 | return true; | |
e2851fe7 AR |
492 | } |
493 | ||
494 | StoreIOState::Pointer | |
495 | Rock::SwapDir::createStoreIO(StoreEntry &e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data) | |
496 | { | |
497 | if (!theFile || theFile->error()) { | |
498 | debugs(47,4, HERE << theFile); | |
499 | return NULL; | |
500 | } | |
501 | ||
c728b6f9 AR |
502 | // compute payload size for our cell header, using StoreEntry info |
503 | // careful: e.objectLen() may still be negative here | |
504 | const int64_t expectedReplySize = e.mem_obj->expectedReplySize(); | |
505 | assert(expectedReplySize >= 0); // must know to prevent cell overflows | |
506 | assert(e.mem_obj->swap_hdr_sz > 0); | |
507 | DbCellHeader header; | |
508 | header.payloadSize = e.mem_obj->swap_hdr_sz + expectedReplySize; | |
509 | const int64_t payloadEnd = sizeof(DbCellHeader) + header.payloadSize; | |
510 | assert(payloadEnd <= max_objsize); | |
511 | ||
5b3ea321 | 512 | sfileno filen; |
44c95fcf | 513 | Ipc::StoreMapSlot *const slot = |
5b3ea321 | 514 | map->openForWriting(reinterpret_cast<const cache_key *>(e.key), filen); |
44c95fcf | 515 | if (!slot) { |
f5adb654 | 516 | debugs(47, 5, HERE << "map->add failed"); |
f1eaa254 | 517 | return NULL; |
e2851fe7 | 518 | } |
c728b6f9 | 519 | e.swap_file_sz = header.payloadSize; // and will be copied to the map |
44c95fcf | 520 | slot->set(e); |
5b3ea321 | 521 | map->extras(filen) = header; |
e2851fe7 | 522 | |
c728b6f9 | 523 | // XXX: We rely on our caller, storeSwapOutStart(), to set e.fileno. |
dd7ac58b AR |
524 | // If that does not happen, the entry will not decrement the read level! |
525 | ||
e2851fe7 AR |
526 | IoState *sio = new IoState(this, &e, cbFile, cbIo, data); |
527 | ||
528 | sio->swap_dirn = index; | |
5b3ea321 | 529 | sio->swap_filen = filen; |
c728b6f9 AR |
530 | sio->payloadEnd = payloadEnd; |
531 | sio->diskOffset = diskOffset(sio->swap_filen); | |
e2851fe7 | 532 | |
5b3ea321 | 533 | debugs(47,5, HERE << "dir " << index << " created new filen " << |
9199139f AR |
534 | std::setfill('0') << std::hex << std::uppercase << std::setw(8) << |
535 | sio->swap_filen << std::dec << " at " << sio->diskOffset); | |
e2851fe7 | 536 | |
c728b6f9 | 537 | assert(sio->diskOffset + payloadEnd <= diskOffsetLimit()); |
e2851fe7 AR |
538 | |
539 | sio->file(theFile); | |
540 | ||
541 | trackReferences(e); | |
542 | return sio; | |
543 | } | |
544 | ||
545 | int64_t | |
546 | Rock::SwapDir::diskOffset(int filen) const | |
547 | { | |
c728b6f9 | 548 | assert(filen >= 0); |
e2851fe7 AR |
549 | return HeaderSize + max_objsize*filen; |
550 | } | |
551 | ||
552 | int64_t | |
553 | Rock::SwapDir::diskOffsetLimit() const | |
554 | { | |
c728b6f9 | 555 | assert(map); |
f1eaa254 | 556 | return diskOffset(map->entryLimit()); |
e2851fe7 AR |
557 | } |
558 | ||
c728b6f9 | 559 | // tries to open an old or being-written-to entry with swap_filen for reading |
e2851fe7 AR |
560 | StoreIOState::Pointer |
561 | Rock::SwapDir::openStoreIO(StoreEntry &e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data) | |
562 | { | |
563 | if (!theFile || theFile->error()) { | |
564 | debugs(47,4, HERE << theFile); | |
565 | return NULL; | |
566 | } | |
567 | ||
9199139f | 568 | if (e.swap_filen < 0) { |
1adea2a6 | 569 | debugs(47,4, HERE << e); |
f1eaa254 DK |
570 | return NULL; |
571 | } | |
572 | ||
e0f3492c | 573 | // Do not start I/O transaction if there are less than 10% free pages left. |
551f8a18 | 574 | // TODO: reserve page instead |
7ef5aa64 | 575 | if (needsDiskStrand() && |
e29ccb57 | 576 | Ipc::Mem::PageLevel(Ipc::Mem::PageId::ioPage) >= 0.9 * Ipc::Mem::PageLimit(Ipc::Mem::PageId::ioPage)) { |
551f8a18 | 577 | debugs(47, 5, HERE << "too few shared pages for IPC I/O left"); |
e0bdae60 DK |
578 | return NULL; |
579 | } | |
580 | ||
c728b6f9 AR |
581 | // The are two ways an entry can get swap_filen: our get() locked it for |
582 | // reading or our storeSwapOutStart() locked it for writing. Peeking at our | |
583 | // locked entry is safe, but no support for reading a filling entry. | |
44c95fcf AR |
584 | const Ipc::StoreMapSlot *slot = map->peekAtReader(e.swap_filen); |
585 | if (!slot) | |
c728b6f9 | 586 | return NULL; // we were writing afterall |
1adea2a6 | 587 | |
e2851fe7 AR |
588 | IoState *sio = new IoState(this, &e, cbFile, cbIo, data); |
589 | ||
590 | sio->swap_dirn = index; | |
591 | sio->swap_filen = e.swap_filen; | |
68353d5a | 592 | sio->payloadEnd = sizeof(DbCellHeader) + map->extras(e.swap_filen).payloadSize; |
c728b6f9 AR |
593 | assert(sio->payloadEnd <= max_objsize); // the payload fits the slot |
594 | ||
5b3ea321 | 595 | debugs(47,5, HERE << "dir " << index << " has old filen: " << |
9199139f AR |
596 | std::setfill('0') << std::hex << std::uppercase << std::setw(8) << |
597 | sio->swap_filen); | |
e2851fe7 | 598 | |
44c95fcf AR |
599 | assert(slot->basics.swap_file_sz > 0); |
600 | assert(slot->basics.swap_file_sz == e.swap_file_sz); | |
e2851fe7 | 601 | |
c728b6f9 AR |
602 | sio->diskOffset = diskOffset(sio->swap_filen); |
603 | assert(sio->diskOffset + sio->payloadEnd <= diskOffsetLimit()); | |
e2851fe7 AR |
604 | |
605 | sio->file(theFile); | |
606 | return sio; | |
607 | } | |
608 | ||
609 | void | |
610 | Rock::SwapDir::ioCompletedNotification() | |
611 | { | |
51618c6a | 612 | if (!theFile) |
e2851fe7 | 613 | fatalf("Rock cache_dir failed to initialize db file: %s", filePath); |
e2851fe7 | 614 | |
51618c6a AR |
615 | if (theFile->error()) |
616 | fatalf("Rock cache_dir at %s failed to open db file: %s", filePath, | |
617 | xstrerror()); | |
e2851fe7 | 618 | |
51618c6a | 619 | debugs(47, 2, "Rock cache_dir[" << index << "] limits: " << |
9199139f AR |
620 | std::setw(12) << maxSize() << " disk bytes and " << |
621 | std::setw(7) << map->entryLimit() << " entries"); | |
9cfba26c AR |
622 | |
623 | rebuild(); | |
e2851fe7 AR |
624 | } |
625 | ||
626 | void | |
627 | Rock::SwapDir::closeCompleted() | |
628 | { | |
629 | theFile = NULL; | |
630 | } | |
631 | ||
632 | void | |
633 | Rock::SwapDir::readCompleted(const char *buf, int rlen, int errflag, RefCount< ::ReadRequest> r) | |
634 | { | |
635 | ReadRequest *request = dynamic_cast<Rock::ReadRequest*>(r.getRaw()); | |
636 | assert(request); | |
637 | IoState::Pointer sio = request->sio; | |
638 | ||
c728b6f9 AR |
639 | if (errflag == DISK_OK && rlen > 0) |
640 | sio->offset_ += rlen; | |
641 | assert(sio->diskOffset + sio->offset_ <= diskOffsetLimit()); // post-factum | |
e2851fe7 AR |
642 | |
643 | StoreIOState::STRCB *callback = sio->read.callback; | |
644 | assert(callback); | |
645 | sio->read.callback = NULL; | |
646 | void *cbdata; | |
647 | if (cbdataReferenceValidDone(sio->read.callback_data, &cbdata)) | |
648 | callback(cbdata, r->buf, rlen, sio.getRaw()); | |
649 | } | |
650 | ||
651 | void | |
652 | Rock::SwapDir::writeCompleted(int errflag, size_t rlen, RefCount< ::WriteRequest> r) | |
653 | { | |
654 | Rock::WriteRequest *request = dynamic_cast<Rock::WriteRequest*>(r.getRaw()); | |
655 | assert(request); | |
656 | assert(request->sio != NULL); | |
657 | IoState &sio = *request->sio; | |
1adea2a6 | 658 | |
f58bb2f4 AR |
659 | if (errflag == DISK_OK) { |
660 | // close, assuming we only write once; the entry gets the read lock | |
44c95fcf | 661 | map->closeForWriting(sio.swap_filen, true); |
c728b6f9 | 662 | // do not increment sio.offset_ because we do it in sio->write() |
f58bb2f4 AR |
663 | } else { |
664 | // Do not abortWriting here. The entry should keep the write lock | |
665 | // instead of losing association with the store and confusing core. | |
666 | map->free(sio.swap_filen); // will mark as unusable, just in case | |
667 | } | |
1adea2a6 | 668 | |
c728b6f9 | 669 | assert(sio.diskOffset + sio.offset_ <= diskOffsetLimit()); // post-factum |
e2851fe7 AR |
670 | |
671 | sio.finishedWriting(errflag); | |
672 | } | |
673 | ||
674 | bool | |
675 | Rock::SwapDir::full() const | |
676 | { | |
c728b6f9 | 677 | return map && map->full(); |
e2851fe7 AR |
678 | } |
679 | ||
e2851fe7 AR |
680 | // storeSwapOutFileClosed calls this nethod on DISK_NO_SPACE_LEFT, |
681 | // but it should not happen for us | |
682 | void | |
9199139f AR |
683 | Rock::SwapDir::diskFull() |
684 | { | |
f5adb654 AR |
685 | debugs(20, DBG_IMPORTANT, "BUG: No space left with rock cache_dir: " << |
686 | filePath); | |
e2851fe7 AR |
687 | } |
688 | ||
689 | /// purge while full(); it should be sufficient to purge just one | |
690 | void | |
691 | Rock::SwapDir::maintain() | |
692 | { | |
9199139f AR |
693 | debugs(47,3, HERE << "cache_dir[" << index << "] guards: " << |
694 | !repl << !map << !full() << StoreController::store_dirs_rebuilding); | |
e2851fe7 AR |
695 | |
696 | if (!repl) | |
697 | return; // no means (cannot find a victim) | |
698 | ||
f428c9c4 AR |
699 | if (!map) |
700 | return; // no victims (yet) | |
701 | ||
e2851fe7 AR |
702 | if (!full()) |
703 | return; // no need (to find a victim) | |
704 | ||
f428c9c4 AR |
705 | // XXX: UFSSwapDir::maintain says we must quit during rebuild |
706 | if (StoreController::store_dirs_rebuilding) | |
707 | return; | |
708 | ||
39c1e1d9 | 709 | debugs(47,3, HERE << "cache_dir[" << index << "] state: " << map->full() << |
57f583f1 | 710 | ' ' << currentSize() << " < " << diskOffsetLimit()); |
e2851fe7 AR |
711 | |
712 | // Hopefully, we find a removable entry much sooner (TODO: use time?) | |
713 | const int maxProbed = 10000; | |
714 | RemovalPurgeWalker *walker = repl->PurgeInit(repl, maxProbed); | |
715 | ||
716 | // It really should not take that long, but this will stop "infinite" loops | |
717 | const int maxFreed = 1000; | |
718 | int freed = 0; | |
719 | // TODO: should we purge more than needed to minimize overheads? | |
720 | for (; freed < maxFreed && full(); ++freed) { | |
721 | if (StoreEntry *e = walker->Next(walker)) | |
722 | e->release(); // will call our unlink() method | |
9199139f | 723 | else |
e2851fe7 | 724 | break; // no more objects |
9199139f | 725 | } |
e2851fe7 AR |
726 | |
727 | debugs(47,2, HERE << "Rock cache_dir[" << index << "] freed " << freed << | |
9199139f | 728 | " scanned " << walker->scanned << '/' << walker->locked); |
e2851fe7 AR |
729 | |
730 | walker->Done(walker); | |
731 | ||
732 | if (full()) { | |
f5adb654 | 733 | debugs(47, DBG_CRITICAL, "ERROR: Rock cache_dir[" << index << "] " << |
9199139f AR |
734 | "is still full after freeing " << freed << " entries. A bug?"); |
735 | } | |
e2851fe7 AR |
736 | } |
737 | ||
738 | void | |
739 | Rock::SwapDir::reference(StoreEntry &e) | |
740 | { | |
741 | debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen); | |
0e240235 | 742 | if (repl && repl->Referenced) |
e2851fe7 AR |
743 | repl->Referenced(repl, &e, &e.repl); |
744 | } | |
745 | ||
4c973beb | 746 | bool |
e2851fe7 AR |
747 | Rock::SwapDir::dereference(StoreEntry &e) |
748 | { | |
749 | debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen); | |
0e240235 | 750 | if (repl && repl->Dereferenced) |
e2851fe7 | 751 | repl->Dereferenced(repl, &e, &e.repl); |
4c973beb AR |
752 | |
753 | // no need to keep e in the global store_table for us; we have our own map | |
754 | return false; | |
e2851fe7 AR |
755 | } |
756 | ||
c521ad17 DK |
757 | bool |
758 | Rock::SwapDir::unlinkdUseful() const | |
759 | { | |
760 | // no entry-specific files to unlink | |
761 | return false; | |
762 | } | |
763 | ||
e2851fe7 AR |
764 | void |
765 | Rock::SwapDir::unlink(StoreEntry &e) | |
766 | { | |
f58bb2f4 | 767 | debugs(47, 5, HERE << e); |
e2851fe7 | 768 | ignoreReferences(e); |
f1eaa254 | 769 | map->free(e.swap_filen); |
f58bb2f4 | 770 | disconnect(e); |
e2851fe7 AR |
771 | } |
772 | ||
773 | void | |
774 | Rock::SwapDir::trackReferences(StoreEntry &e) | |
775 | { | |
f58bb2f4 | 776 | debugs(47, 5, HERE << e); |
0e240235 AR |
777 | if (repl) |
778 | repl->Add(repl, &e, &e.repl); | |
e2851fe7 AR |
779 | } |
780 | ||
e2851fe7 AR |
781 | void |
782 | Rock::SwapDir::ignoreReferences(StoreEntry &e) | |
783 | { | |
f58bb2f4 | 784 | debugs(47, 5, HERE << e); |
0e240235 AR |
785 | if (repl) |
786 | repl->Remove(repl, &e, &e.repl); | |
e2851fe7 AR |
787 | } |
788 | ||
789 | void | |
790 | Rock::SwapDir::statfs(StoreEntry &e) const | |
791 | { | |
792 | storeAppendPrintf(&e, "\n"); | |
c91ca3ce | 793 | storeAppendPrintf(&e, "Maximum Size: %" PRIu64 " KB\n", maxSize() >> 10); |
57f583f1 | 794 | storeAppendPrintf(&e, "Current Size: %.2f KB %.2f%%\n", |
cc34568d DK |
795 | currentSize() / 1024.0, |
796 | Math::doublePercent(currentSize(), maxSize())); | |
e2851fe7 | 797 | |
714a769f AR |
798 | if (map) { |
799 | const int limit = map->entryLimit(); | |
800 | storeAppendPrintf(&e, "Maximum entries: %9d\n", limit); | |
c728b6f9 AR |
801 | if (limit > 0) { |
802 | const int entryCount = map->entryCount(); | |
714a769f | 803 | storeAppendPrintf(&e, "Current entries: %9d %.2f%%\n", |
9199139f | 804 | entryCount, (100.0 * entryCount / limit)); |
c728b6f9 AR |
805 | |
806 | if (limit < 100) { // XXX: otherwise too expensive to count | |
44c95fcf | 807 | Ipc::ReadWriteLockStats stats; |
c728b6f9 AR |
808 | map->updateStats(stats); |
809 | stats.dump(e); | |
810 | } | |
811 | } | |
9199139f | 812 | } |
e2851fe7 AR |
813 | |
814 | storeAppendPrintf(&e, "Pending operations: %d out of %d\n", | |
9199139f | 815 | store_open_disk_fd, Config.max_open_disk_fds); |
e2851fe7 AR |
816 | |
817 | storeAppendPrintf(&e, "Flags:"); | |
818 | ||
819 | if (flags.selected) | |
820 | storeAppendPrintf(&e, " SELECTED"); | |
821 | ||
822 | if (flags.read_only) | |
823 | storeAppendPrintf(&e, " READ-ONLY"); | |
824 | ||
825 | storeAppendPrintf(&e, "\n"); | |
826 | ||
827 | } | |
902df398 | 828 | |
9bb01611 | 829 | namespace Rock |
902df398 | 830 | { |
a46ed03b | 831 | RunnerRegistrationEntry(rrAfterConfig, SwapDirRr); |
9bb01611 | 832 | } |
902df398 | 833 | |
9bb01611 | 834 | void Rock::SwapDirRr::create(const RunnerRegistry &) |
902df398 | 835 | { |
4404f1c5 DK |
836 | Must(owners.empty()); |
837 | for (int i = 0; i < Config.cacheSwap.n_configured; ++i) { | |
838 | if (const Rock::SwapDir *const sd = dynamic_cast<Rock::SwapDir *>(INDEXSD(i))) { | |
4404f1c5 DK |
839 | Rock::SwapDir::DirMap::Owner *const owner = |
840 | Rock::SwapDir::DirMap::Init(sd->path, sd->entryLimitAllowed()); | |
841 | owners.push_back(owner); | |
902df398 DK |
842 | } |
843 | } | |
844 | } | |
845 | ||
9bb01611 | 846 | Rock::SwapDirRr::~SwapDirRr() |
902df398 DK |
847 | { |
848 | for (size_t i = 0; i < owners.size(); ++i) | |
849 | delete owners[i]; | |
850 | } |