]> git.ipfire.org Git - thirdparty/squid.git/blob - src/fs/ufs/RebuildState.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / fs / ufs / RebuildState.cc
1 /*
2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 47 Store Directory Routines */
10
11 #include "squid.h"
12 #include "disk.h"
13 #include "globals.h"
14 #include "RebuildState.h"
15 #include "SquidConfig.h"
16 #include "SquidTime.h"
17 #include "store_key_md5.h"
18 #include "store_rebuild.h"
19 #include "StoreSwapLogData.h"
20 #include "tools.h"
21 #include "UFSSwapLogParser.h"
22
23 #include <cerrno>
24 #include <cmath>
25 #if HAVE_SYS_STAT_H
26 #include <sys/stat.h>
27 #endif
28
29 CBDATA_NAMESPACED_CLASS_INIT(Fs::Ufs,RebuildState);
30
31 Fs::Ufs::RebuildState::RebuildState(RefCount<UFSSwapDir> aSwapDir) :
32 sd (aSwapDir), LogParser(NULL), e(NULL), fromLog(true), _done (false)
33 {
34 /*
35 * If the swap.state file exists in the cache_dir, then
36 * we'll use commonUfsDirRebuildFromSwapLog(), otherwise we'll
37 * use commonUfsDirRebuildFromDirectory() to open up each file
38 * and suck in the meta data.
39 */
40 int clean = 0; //TODO: change to bool
41 int zeroLengthLog = 0;
42 FILE *fp = sd->openTmpSwapLog(&clean, &zeroLengthLog);
43
44 if (fp && !zeroLengthLog)
45 LogParser = Fs::Ufs::UFSSwapLogParser::GetUFSSwapLogParser(fp);
46
47 if (LogParser == NULL ) {
48 fromLog = false;
49
50 if (fp != NULL)
51 fclose(fp);
52
53 } else {
54 fromLog = true;
55 flags.clean = (clean != 0);
56 }
57
58 if (!clean)
59 flags.need_to_validate = true;
60
61 debugs(47, DBG_IMPORTANT, "Rebuilding storage in " << sd->path << " (" <<
62 (clean ? "clean log" : (LogParser ? "dirty log" : "no log")) << ")");
63 }
64
65 Fs::Ufs::RebuildState::~RebuildState()
66 {
67 sd->closeTmpSwapLog();
68
69 if (LogParser)
70 delete LogParser;
71 }
72
73 void
74 Fs::Ufs::RebuildState::RebuildStep(void *data)
75 {
76 RebuildState *rb = (RebuildState *)data;
77 rb->rebuildStep();
78
79 if (!rb->isDone())
80 eventAdd("storeRebuild", RebuildStep, rb, 0.01, 1);
81 else {
82 -- StoreController::store_dirs_rebuilding;
83 storeRebuildComplete(&rb->counts);
84 delete rb;
85 }
86 }
87
88 /// load entries from swap.state or files until we run out of entries or time
89 void
90 Fs::Ufs::RebuildState::rebuildStep()
91 {
92 currentEntry(NULL);
93
94 // Balance our desire to maximize the number of entries processed at once
95 // (and, hence, minimize overheads and total rebuild time) with a
96 // requirement to also process Coordinator events, disk I/Os, etc.
97 const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
98 const timeval loopStart = current_time;
99
100 const int totalEntries = LogParser ? LogParser->SwapLogEntries() : -1;
101
102 while (!isDone()) {
103 if (fromLog)
104 rebuildFromSwapLog();
105 else
106 rebuildFromDirectory();
107
108 // TODO: teach storeRebuildProgress to handle totalEntries <= 0
109 if (totalEntries > 0 && (n_read % 4000 == 0))
110 storeRebuildProgress(sd->index, totalEntries, n_read);
111
112 if (opt_foreground_rebuild)
113 continue; // skip "few entries at a time" check below
114
115 getCurrentTime();
116 const double elapsedMsec = tvSubMsec(loopStart, current_time);
117 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
118 debugs(47, 5, HERE << "pausing after " << n_read << " entries in " <<
119 elapsedMsec << "ms; " << (elapsedMsec/n_read) << "ms per entry");
120 break;
121 }
122 }
123 }
124
125 /// process one cache file
126 void
127 Fs::Ufs::RebuildState::rebuildFromDirectory()
128 {
129 cache_key key[SQUID_MD5_DIGEST_LENGTH];
130
131 struct stat sb;
132 int fd = -1;
133 debugs(47, 3, HERE << "DIR #" << sd->index);
134
135 assert(fd == -1);
136 sfileno filn = 0;
137 int size;
138 fd = getNextFile(&filn, &size);
139
140 if (fd == -2) {
141 debugs(47, DBG_IMPORTANT, "Done scanning " << sd->path << " dir (" <<
142 n_read << " entries)");
143 _done = true;
144 return;
145 } else if (fd < 0) {
146 return;
147 }
148
149 assert(fd > -1);
150 /* lets get file stats here */
151
152 ++n_read;
153
154 if (fstat(fd, &sb) < 0) {
155 debugs(47, DBG_IMPORTANT, HERE << "fstat(FD " << fd << "): " << xstrerror());
156 file_close(fd);
157 --store_open_disk_fd;
158 fd = -1;
159 return;
160 }
161
162 MemBuf buf;
163 buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
164 if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
165 return;
166
167 const uint64_t expectedSize = sb.st_size > 0 ?
168 static_cast<uint64_t>(sb.st_size) : 0;
169
170 StoreEntry tmpe;
171 const bool parsed = storeRebuildParseEntry(buf, tmpe, key, counts,
172 expectedSize);
173
174 file_close(fd);
175 --store_open_disk_fd;
176 fd = -1;
177
178 bool accepted = parsed && tmpe.swap_file_sz > 0;
179 if (parsed && !accepted) {
180 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring ufs cache entry with " <<
181 "unknown size: " << tmpe);
182 accepted = false;
183 }
184
185 if (!accepted) {
186 // XXX: shouldn't this be a call to commonUfsUnlink?
187 sd->unlinkFile(filn); // should we unlink in all failure cases?
188 return;
189 }
190
191 if (!storeRebuildKeepEntry(tmpe, key, counts))
192 return;
193
194 ++counts.objcount;
195 // tmpe.dump(5);
196 currentEntry(sd->addDiskRestore(key,
197 filn,
198 tmpe.swap_file_sz,
199 tmpe.expires,
200 tmpe.timestamp,
201 tmpe.lastref,
202 tmpe.lastmod,
203 tmpe.refcount, /* refcount */
204 tmpe.flags, /* flags */
205 (int) flags.clean));
206 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
207 }
208
209 StoreEntry *
210 Fs::Ufs::RebuildState::currentEntry() const
211 {
212 return e;
213 }
214
215 void
216 Fs::Ufs::RebuildState::currentEntry(StoreEntry *newValue)
217 {
218 e = newValue;
219 }
220
221 /// process one swap log entry
222 void
223 Fs::Ufs::RebuildState::rebuildFromSwapLog()
224 {
225 StoreSwapLogData swapData;
226
227 if (LogParser->ReadRecord(swapData) != 1) {
228 debugs(47, DBG_IMPORTANT, "Done reading " << sd->path << " swaplog (" << n_read << " entries)");
229 LogParser->Close();
230 delete LogParser;
231 LogParser = NULL;
232 _done = true;
233 return;
234 }
235
236 ++n_read;
237
238 if (!swapData.sane()) {
239 ++counts.invalid;
240 return;
241 }
242
243 /*
244 * BC: during 2.4 development, we changed the way swap file
245 * numbers are assigned and stored. The high 16 bits used
246 * to encode the SD index number. There used to be a call
247 * to storeDirProperFileno here that re-assigned the index
248 * bits. Now, for backwards compatibility, we just need
249 * to mask it off.
250 */
251 swapData.swap_filen &= 0x00FFFFFF;
252
253 debugs(47, 3, HERE << swap_log_op_str[(int) swapData.op] << " " <<
254 storeKeyText(swapData.key) << " "<< std::setfill('0') <<
255 std::hex << std::uppercase << std::setw(8) <<
256 swapData.swap_filen);
257
258 if (swapData.op == SWAP_LOG_ADD) {
259 (void) 0;
260 } else if (swapData.op == SWAP_LOG_DEL) {
261 /* Delete unless we already have a newer copy anywhere in any store */
262 /* this needs to become
263 * 1) unpack url
264 * 2) make synthetic request with headers ?? or otherwise search
265 * for a matching object in the store
266 * TODO FIXME change to new async api
267 */
268 currentEntry (Store::Root().get(swapData.key));
269
270 if (currentEntry() != NULL && swapData.lastref >= e->lastref) {
271 undoAdd();
272 --counts.objcount;
273 ++counts.cancelcount;
274 }
275 return;
276 } else {
277 const double
278 x = ::log(static_cast<double>(++counts.bad_log_op)) / ::log(10.0);
279
280 if (0.0 == x - (double) (int) x)
281 debugs(47, DBG_IMPORTANT, "WARNING: " << counts.bad_log_op << " invalid swap log entries found");
282
283 ++counts.invalid;
284
285 return;
286 }
287
288 ++counts.scancount; // XXX: should not this be incremented earlier?
289
290 if (!sd->validFileno(swapData.swap_filen, 0)) {
291 ++counts.invalid;
292 return;
293 }
294
295 if (EBIT_TEST(swapData.flags, KEY_PRIVATE)) {
296 ++counts.badflags;
297 return;
298 }
299
300 /* this needs to become
301 * 1) unpack url
302 * 2) make synthetic request with headers ?? or otherwise search
303 * for a matching object in the store
304 * TODO FIXME change to new async api
305 */
306 currentEntry (Store::Root().get(swapData.key));
307
308 int used; /* is swapfile already in use? */
309
310 used = sd->mapBitTest(swapData.swap_filen);
311
312 /* If this URL already exists in the cache, does the swap log
313 * appear to have a newer entry? Compare 'lastref' from the
314 * swap log to e->lastref. */
315 /* is the log entry newer than current entry? */
316 int disk_entry_newer = currentEntry() ? (swapData.lastref > currentEntry()->lastref ? 1 : 0) : 0;
317
318 if (used && !disk_entry_newer) {
319 /* log entry is old, ignore it */
320 ++counts.clashcount;
321 return;
322 } else if (used && currentEntry() && currentEntry()->swap_filen == swapData.swap_filen && currentEntry()->swap_dirn == sd->index) {
323 /* swapfile taken, same URL, newer, update meta */
324
325 if (currentEntry()->store_status == STORE_OK) {
326 currentEntry()->lastref = swapData.timestamp;
327 currentEntry()->timestamp = swapData.timestamp;
328 currentEntry()->expires = swapData.expires;
329 currentEntry()->lastmod = swapData.lastmod;
330 currentEntry()->flags = swapData.flags;
331 currentEntry()->refcount += swapData.refcount;
332 sd->dereference(*currentEntry(), false);
333 } else {
334 debug_trap("commonUfsDirRebuildFromSwapLog: bad condition");
335 debugs(47, DBG_IMPORTANT, HERE << "bad condition");
336 }
337 return;
338 } else if (used) {
339 /* swapfile in use, not by this URL, log entry is newer */
340 /* This is sorta bad: the log entry should NOT be newer at this
341 * point. If the log is dirty, the filesize check should have
342 * caught this. If the log is clean, there should never be a
343 * newer entry. */
344 debugs(47, DBG_IMPORTANT, "WARNING: newer swaplog entry for dirno " <<
345 sd->index << ", fileno "<< std::setfill('0') << std::hex <<
346 std::uppercase << std::setw(8) << swapData.swap_filen);
347
348 /* I'm tempted to remove the swapfile here just to be safe,
349 * but there is a bad race condition in the NOVM version if
350 * the swapfile has recently been opened for writing, but
351 * not yet opened for reading. Because we can't map
352 * swapfiles back to StoreEntrys, we don't know the state
353 * of the entry using that file. */
354 /* We'll assume the existing entry is valid, probably because
355 * were in a slow rebuild and the the swap file number got taken
356 * and the validation procedure hasn't run. */
357 assert(flags.need_to_validate);
358 ++counts.clashcount;
359 return;
360 } else if (currentEntry() && !disk_entry_newer) {
361 /* key already exists, current entry is newer */
362 /* keep old, ignore new */
363 ++counts.dupcount;
364 return;
365 } else if (currentEntry()) {
366 /* key already exists, this swapfile not being used */
367 /* junk old, load new */
368 undoAdd();
369 --counts.objcount;
370 ++counts.dupcount;
371 } else {
372 /* URL doesnt exist, swapfile not in use */
373 /* load new */
374 (void) 0;
375 }
376
377 ++counts.objcount;
378
379 currentEntry(sd->addDiskRestore(swapData.key,
380 swapData.swap_filen,
381 swapData.swap_file_sz,
382 swapData.expires,
383 swapData.timestamp,
384 swapData.lastref,
385 swapData.lastmod,
386 swapData.refcount,
387 swapData.flags,
388 (int) flags.clean));
389
390 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
391 }
392
393 /// undo the effects of adding an entry in rebuildFromSwapLog()
394 void
395 Fs::Ufs::RebuildState::undoAdd()
396 {
397 StoreEntry *added = currentEntry();
398 assert(added);
399 currentEntry(NULL);
400
401 // TODO: Why bother with these two if we are going to release?!
402 added->expireNow();
403 added->releaseRequest();
404
405 if (added->swap_filen > -1) {
406 SwapDir *someDir = INDEXSD(added->swap_dirn);
407 assert(someDir);
408 if (UFSSwapDir *ufsDir = dynamic_cast<UFSSwapDir*>(someDir))
409 ufsDir->undoAddDiskRestore(added);
410 // else the entry was loaded from and/or is currently in a non-UFS dir
411 // Thus, there is no use in preserving its disk file (the only purpose
412 // of undoAddDiskRestore!), even if we could. Instead, we release the
413 // the entry and [eventually] unlink its disk file or free its slot.
414 }
415
416 added->release();
417 }
418
419 int
420 Fs::Ufs::RebuildState::getNextFile(sfileno * filn_p, int *)
421 {
422 int fd = -1;
423 int dirs_opened = 0;
424 debugs(47, 3, HERE << "flag=" << flags.init << ", " <<
425 sd->index << ": /"<< std::setfill('0') << std::hex <<
426 std::uppercase << std::setw(2) << curlvl1 << "/" << std::setw(2) <<
427 curlvl2);
428
429 if (done)
430 return -2;
431
432 while (fd < 0 && done == 0) {
433 fd = -1;
434
435 if (!flags.init) { /* initialize, open first file */
436 done = 0;
437 curlvl1 = 0;
438 curlvl2 = 0;
439 in_dir = 0;
440 flags.init = true;
441 assert(Config.cacheSwap.n_configured > 0);
442 }
443
444 if (0 == in_dir) { /* we need to read in a new directory */
445 snprintf(fullpath, MAXPATHLEN, "%s/%02X/%02X",
446 sd->path,
447 curlvl1, curlvl2);
448
449 if (dirs_opened)
450 return -1;
451
452 td = opendir(fullpath);
453
454 ++dirs_opened;
455
456 if (td == NULL) {
457 debugs(47, DBG_IMPORTANT, HERE << "error in opendir (" << fullpath << "): " << xstrerror());
458 } else {
459 entry = readdir(td); /* skip . and .. */
460 entry = readdir(td);
461
462 if (entry == NULL && errno == ENOENT)
463 debugs(47, DBG_IMPORTANT, HERE << "WARNING: directory does not exist!");
464 debugs(47, 3, HERE << "Directory " << fullpath);
465 }
466 }
467
468 if (td != NULL && (entry = readdir(td)) != NULL) {
469 ++in_dir;
470
471 if (sscanf(entry->d_name, "%x", &fn) != 1) {
472 debugs(47, 3, HERE << "invalid entry " << entry->d_name);
473 continue;
474 }
475
476 if (!UFSSwapDir::FilenoBelongsHere(fn, sd->index, curlvl1, curlvl2)) {
477 debugs(47, 3, HERE << std::setfill('0') <<
478 std::hex << std::uppercase << std::setw(8) << fn <<
479 " does not belong in " << std::dec << sd->index << "/" <<
480 curlvl1 << "/" << curlvl2);
481
482 continue;
483 }
484
485 if (sd->mapBitTest(fn)) {
486 debugs(47, 3, HERE << "Locked, continuing with next.");
487 continue;
488 }
489
490 snprintf(fullfilename, MAXPATHLEN, "%s/%s",
491 fullpath, entry->d_name);
492 debugs(47, 3, HERE << "Opening " << fullfilename);
493 fd = file_open(fullfilename, O_RDONLY | O_BINARY);
494
495 if (fd < 0)
496 debugs(47, DBG_IMPORTANT, HERE << "error opening " << fullfilename << ": " << xstrerror());
497 else
498 ++store_open_disk_fd;
499
500 continue;
501 }
502
503 if (td != NULL)
504 closedir(td);
505
506 td = NULL;
507
508 in_dir = 0;
509
510 if (sd->validL2(++curlvl2))
511 continue;
512
513 curlvl2 = 0;
514
515 if (sd->validL1(++curlvl1))
516 continue;
517
518 curlvl1 = 0;
519
520 done = 1;
521 }
522
523 *filn_p = fn;
524 return fd;
525 }
526
527 bool
528 Fs::Ufs::RebuildState::error() const
529 {
530 return false;
531 }
532
533 bool
534 Fs::Ufs::RebuildState::isDone() const
535 {
536 return _done;
537 }
538
539 StoreEntry *
540 Fs::Ufs::RebuildState::currentItem()
541 {
542 return currentEntry();
543 }
544