]> git.ipfire.org Git - thirdparty/squid.git/blob - src/fs/ufs/RebuildState.cc
merge from trunk r14444
[thirdparty/squid.git] / src / fs / ufs / RebuildState.cc
1 /*
2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 47 Store Directory Routines */
10
11 #include "squid.h"
12 #include "fs_io.h"
13 #include "globals.h"
14 #include "RebuildState.h"
15 #include "SquidConfig.h"
16 #include "SquidTime.h"
17 #include "store/Disks.h"
18 #include "store_key_md5.h"
19 #include "store_rebuild.h"
20 #include "StoreSwapLogData.h"
21 #include "tools.h"
22 #include "UFSSwapLogParser.h"
23
24 #include <cerrno>
25 #include <cmath>
26 #if HAVE_SYS_STAT_H
27 #include <sys/stat.h>
28 #endif
29
30 CBDATA_NAMESPACED_CLASS_INIT(Fs::Ufs,RebuildState);
31
32 Fs::Ufs::RebuildState::RebuildState(RefCount<UFSSwapDir> aSwapDir) :
33 sd(aSwapDir),
34 n_read(0),
35 LogParser(NULL),
36 curlvl1(0),
37 curlvl2(0),
38 in_dir(0),
39 done(0),
40 fn(0),
41 entry(NULL),
42 td(NULL),
43 e(NULL),
44 fromLog(true),
45 _done(false),
46 cbdata(NULL)
47 {
48 *fullpath = 0;
49 *fullfilename = 0;
50
51 /*
52 * If the swap.state file exists in the cache_dir, then
53 * we'll use commonUfsDirRebuildFromSwapLog(), otherwise we'll
54 * use commonUfsDirRebuildFromDirectory() to open up each file
55 * and suck in the meta data.
56 */
57 int clean = 0; //TODO: change to bool
58 int zeroLengthLog = 0;
59 FILE *fp = sd->openTmpSwapLog(&clean, &zeroLengthLog);
60
61 if (fp && !zeroLengthLog)
62 LogParser = Fs::Ufs::UFSSwapLogParser::GetUFSSwapLogParser(fp);
63
64 if (LogParser == NULL ) {
65 fromLog = false;
66
67 if (fp != NULL)
68 fclose(fp);
69
70 } else {
71 fromLog = true;
72 flags.clean = (clean != 0);
73 }
74
75 if (!clean)
76 flags.need_to_validate = true;
77
78 debugs(47, DBG_IMPORTANT, "Rebuilding storage in " << sd->path << " (" <<
79 (clean ? "clean log" : (LogParser ? "dirty log" : "no log")) << ")");
80 }
81
82 Fs::Ufs::RebuildState::~RebuildState()
83 {
84 sd->closeTmpSwapLog();
85
86 if (LogParser)
87 delete LogParser;
88 }
89
90 void
91 Fs::Ufs::RebuildState::RebuildStep(void *data)
92 {
93 RebuildState *rb = (RebuildState *)data;
94 rb->rebuildStep();
95
96 if (!rb->isDone())
97 eventAdd("storeRebuild", RebuildStep, rb, 0.01, 1);
98 else {
99 -- StoreController::store_dirs_rebuilding;
100 storeRebuildComplete(&rb->counts);
101 delete rb;
102 }
103 }
104
105 /// load entries from swap.state or files until we run out of entries or time
106 void
107 Fs::Ufs::RebuildState::rebuildStep()
108 {
109 currentEntry(NULL);
110
111 // Balance our desire to maximize the number of entries processed at once
112 // (and, hence, minimize overheads and total rebuild time) with a
113 // requirement to also process Coordinator events, disk I/Os, etc.
114 const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
115 const timeval loopStart = current_time;
116
117 const int totalEntries = LogParser ? LogParser->SwapLogEntries() : -1;
118
119 while (!isDone()) {
120 if (fromLog)
121 rebuildFromSwapLog();
122 else
123 rebuildFromDirectory();
124
125 // TODO: teach storeRebuildProgress to handle totalEntries <= 0
126 if (totalEntries > 0 && (n_read % 4000 == 0))
127 storeRebuildProgress(sd->index, totalEntries, n_read);
128
129 if (opt_foreground_rebuild)
130 continue; // skip "few entries at a time" check below
131
132 getCurrentTime();
133 const double elapsedMsec = tvSubMsec(loopStart, current_time);
134 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
135 debugs(47, 5, HERE << "pausing after " << n_read << " entries in " <<
136 elapsedMsec << "ms; " << (elapsedMsec/n_read) << "ms per entry");
137 break;
138 }
139 }
140 }
141
142 /// process one cache file
143 void
144 Fs::Ufs::RebuildState::rebuildFromDirectory()
145 {
146 cache_key key[SQUID_MD5_DIGEST_LENGTH];
147
148 struct stat sb;
149 int fd = -1;
150 debugs(47, 3, HERE << "DIR #" << sd->index);
151
152 assert(fd == -1);
153 sfileno filn = 0;
154 int size;
155 fd = getNextFile(&filn, &size);
156
157 if (fd == -2) {
158 debugs(47, DBG_IMPORTANT, "Done scanning " << sd->path << " dir (" <<
159 n_read << " entries)");
160 _done = true;
161 return;
162 } else if (fd < 0) {
163 return;
164 }
165
166 assert(fd > -1);
167 /* lets get file stats here */
168
169 ++n_read;
170
171 if (fstat(fd, &sb) < 0) {
172 debugs(47, DBG_IMPORTANT, HERE << "fstat(FD " << fd << "): " << xstrerror());
173 file_close(fd);
174 --store_open_disk_fd;
175 fd = -1;
176 return;
177 }
178
179 MemBuf buf;
180 buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
181 if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
182 return;
183
184 const uint64_t expectedSize = sb.st_size > 0 ?
185 static_cast<uint64_t>(sb.st_size) : 0;
186
187 StoreEntry tmpe;
188 const bool parsed = storeRebuildParseEntry(buf, tmpe, key, counts,
189 expectedSize);
190
191 file_close(fd);
192 --store_open_disk_fd;
193 fd = -1;
194
195 bool accepted = parsed && tmpe.swap_file_sz > 0;
196 if (parsed && !accepted) {
197 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring ufs cache entry with " <<
198 "unknown size: " << tmpe);
199 accepted = false;
200 }
201
202 if (!accepted) {
203 // XXX: shouldn't this be a call to commonUfsUnlink?
204 sd->unlinkFile(filn); // should we unlink in all failure cases?
205 return;
206 }
207
208 if (!storeRebuildKeepEntry(tmpe, key, counts))
209 return;
210
211 ++counts.objcount;
212 // tmpe.dump(5);
213 currentEntry(sd->addDiskRestore(key,
214 filn,
215 tmpe.swap_file_sz,
216 tmpe.expires,
217 tmpe.timestamp,
218 tmpe.lastref,
219 tmpe.lastmod,
220 tmpe.refcount, /* refcount */
221 tmpe.flags, /* flags */
222 (int) flags.clean));
223 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
224 }
225
226 StoreEntry *
227 Fs::Ufs::RebuildState::currentEntry() const
228 {
229 return e;
230 }
231
232 void
233 Fs::Ufs::RebuildState::currentEntry(StoreEntry *newValue)
234 {
235 e = newValue;
236 }
237
238 /// process one swap log entry
239 void
240 Fs::Ufs::RebuildState::rebuildFromSwapLog()
241 {
242 StoreSwapLogData swapData;
243
244 if (LogParser->ReadRecord(swapData) != 1) {
245 debugs(47, DBG_IMPORTANT, "Done reading " << sd->path << " swaplog (" << n_read << " entries)");
246 LogParser->Close();
247 delete LogParser;
248 LogParser = NULL;
249 _done = true;
250 return;
251 }
252
253 ++n_read;
254
255 if (!swapData.sane()) {
256 ++counts.invalid;
257 return;
258 }
259
260 /*
261 * BC: during 2.4 development, we changed the way swap file
262 * numbers are assigned and stored. The high 16 bits used
263 * to encode the SD index number. There used to be a call
264 * to storeDirProperFileno here that re-assigned the index
265 * bits. Now, for backwards compatibility, we just need
266 * to mask it off.
267 */
268 swapData.swap_filen &= 0x00FFFFFF;
269
270 debugs(47, 3, HERE << swap_log_op_str[(int) swapData.op] << " " <<
271 storeKeyText(swapData.key) << " "<< std::setfill('0') <<
272 std::hex << std::uppercase << std::setw(8) <<
273 swapData.swap_filen);
274
275 if (swapData.op == SWAP_LOG_ADD) {
276 (void) 0;
277 } else if (swapData.op == SWAP_LOG_DEL) {
278 /* Delete unless we already have a newer copy anywhere in any store */
279 /* this needs to become
280 * 1) unpack url
281 * 2) make synthetic request with headers ?? or otherwise search
282 * for a matching object in the store
283 * TODO FIXME change to new async api
284 */
285 currentEntry (Store::Root().get(swapData.key));
286
287 if (currentEntry() != NULL && swapData.lastref >= e->lastref) {
288 undoAdd();
289 --counts.objcount;
290 ++counts.cancelcount;
291 }
292 return;
293 } else {
294 const double
295 x = ::log(static_cast<double>(++counts.bad_log_op)) / ::log(10.0);
296
297 if (0.0 == x - (double) (int) x)
298 debugs(47, DBG_IMPORTANT, "WARNING: " << counts.bad_log_op << " invalid swap log entries found");
299
300 ++counts.invalid;
301
302 return;
303 }
304
305 ++counts.scancount; // XXX: should not this be incremented earlier?
306
307 if (!sd->validFileno(swapData.swap_filen, 0)) {
308 ++counts.invalid;
309 return;
310 }
311
312 if (EBIT_TEST(swapData.flags, KEY_PRIVATE)) {
313 ++counts.badflags;
314 return;
315 }
316
317 /* this needs to become
318 * 1) unpack url
319 * 2) make synthetic request with headers ?? or otherwise search
320 * for a matching object in the store
321 * TODO FIXME change to new async api
322 */
323 currentEntry (Store::Root().get(swapData.key));
324
325 int used; /* is swapfile already in use? */
326
327 used = sd->mapBitTest(swapData.swap_filen);
328
329 /* If this URL already exists in the cache, does the swap log
330 * appear to have a newer entry? Compare 'lastref' from the
331 * swap log to e->lastref. */
332 /* is the log entry newer than current entry? */
333 int disk_entry_newer = currentEntry() ? (swapData.lastref > currentEntry()->lastref ? 1 : 0) : 0;
334
335 if (used && !disk_entry_newer) {
336 /* log entry is old, ignore it */
337 ++counts.clashcount;
338 return;
339 } else if (used && currentEntry() && currentEntry()->swap_filen == swapData.swap_filen && currentEntry()->swap_dirn == sd->index) {
340 /* swapfile taken, same URL, newer, update meta */
341
342 if (currentEntry()->store_status == STORE_OK) {
343 currentEntry()->lastref = swapData.timestamp;
344 currentEntry()->timestamp = swapData.timestamp;
345 currentEntry()->expires = swapData.expires;
346 currentEntry()->lastmod = swapData.lastmod;
347 currentEntry()->flags = swapData.flags;
348 currentEntry()->refcount += swapData.refcount;
349 sd->dereference(*currentEntry());
350 } else {
351 debug_trap("commonUfsDirRebuildFromSwapLog: bad condition");
352 debugs(47, DBG_IMPORTANT, HERE << "bad condition");
353 }
354 return;
355 } else if (used) {
356 /* swapfile in use, not by this URL, log entry is newer */
357 /* This is sorta bad: the log entry should NOT be newer at this
358 * point. If the log is dirty, the filesize check should have
359 * caught this. If the log is clean, there should never be a
360 * newer entry. */
361 debugs(47, DBG_IMPORTANT, "WARNING: newer swaplog entry for dirno " <<
362 sd->index << ", fileno "<< std::setfill('0') << std::hex <<
363 std::uppercase << std::setw(8) << swapData.swap_filen);
364
365 /* I'm tempted to remove the swapfile here just to be safe,
366 * but there is a bad race condition in the NOVM version if
367 * the swapfile has recently been opened for writing, but
368 * not yet opened for reading. Because we can't map
369 * swapfiles back to StoreEntrys, we don't know the state
370 * of the entry using that file. */
371 /* We'll assume the existing entry is valid, probably because
372 * were in a slow rebuild and the the swap file number got taken
373 * and the validation procedure hasn't run. */
374 assert(flags.need_to_validate);
375 ++counts.clashcount;
376 return;
377 } else if (currentEntry() && !disk_entry_newer) {
378 /* key already exists, current entry is newer */
379 /* keep old, ignore new */
380 ++counts.dupcount;
381 return;
382 } else if (currentEntry()) {
383 /* key already exists, this swapfile not being used */
384 /* junk old, load new */
385 undoAdd();
386 --counts.objcount;
387 ++counts.dupcount;
388 } else {
389 /* URL doesnt exist, swapfile not in use */
390 /* load new */
391 (void) 0;
392 }
393
394 ++counts.objcount;
395
396 currentEntry(sd->addDiskRestore(swapData.key,
397 swapData.swap_filen,
398 swapData.swap_file_sz,
399 swapData.expires,
400 swapData.timestamp,
401 swapData.lastref,
402 swapData.lastmod,
403 swapData.refcount,
404 swapData.flags,
405 (int) flags.clean));
406
407 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
408 }
409
410 /// undo the effects of adding an entry in rebuildFromSwapLog()
411 void
412 Fs::Ufs::RebuildState::undoAdd()
413 {
414 StoreEntry *added = currentEntry();
415 assert(added);
416 currentEntry(NULL);
417
418 // TODO: Why bother with these two if we are going to release?!
419 added->expireNow();
420 added->releaseRequest();
421
422 if (added->swap_filen > -1) {
423 SwapDir *someDir = INDEXSD(added->swap_dirn);
424 assert(someDir);
425 if (UFSSwapDir *ufsDir = dynamic_cast<UFSSwapDir*>(someDir))
426 ufsDir->undoAddDiskRestore(added);
427 // else the entry was loaded from and/or is currently in a non-UFS dir
428 // Thus, there is no use in preserving its disk file (the only purpose
429 // of undoAddDiskRestore!), even if we could. Instead, we release the
430 // the entry and [eventually] unlink its disk file or free its slot.
431 }
432
433 added->release();
434 }
435
436 int
437 Fs::Ufs::RebuildState::getNextFile(sfileno * filn_p, int *)
438 {
439 int fd = -1;
440 int dirs_opened = 0;
441 debugs(47, 3, HERE << "flag=" << flags.init << ", " <<
442 sd->index << ": /"<< std::setfill('0') << std::hex <<
443 std::uppercase << std::setw(2) << curlvl1 << "/" << std::setw(2) <<
444 curlvl2);
445
446 if (done)
447 return -2;
448
449 while (fd < 0 && done == 0) {
450 fd = -1;
451
452 if (!flags.init) { /* initialize, open first file */
453 // XXX: 0's should not be needed, constructor inits now
454 done = 0;
455 curlvl1 = 0;
456 curlvl2 = 0;
457 in_dir = 0;
458 flags.init = true;
459 assert(Config.cacheSwap.n_configured > 0);
460 }
461
462 if (0 == in_dir) { /* we need to read in a new directory */
463 snprintf(fullpath, MAXPATHLEN, "%s/%02X/%02X",
464 sd->path,
465 curlvl1, curlvl2);
466
467 if (dirs_opened)
468 return -1;
469
470 td = opendir(fullpath);
471
472 ++dirs_opened;
473
474 if (td == NULL) {
475 debugs(47, DBG_IMPORTANT, HERE << "error in opendir (" << fullpath << "): " << xstrerror());
476 } else {
477 entry = readdir(td); /* skip . and .. */
478 entry = readdir(td);
479
480 if (entry == NULL && errno == ENOENT)
481 debugs(47, DBG_IMPORTANT, HERE << "WARNING: directory does not exist!");
482 debugs(47, 3, HERE << "Directory " << fullpath);
483 }
484 }
485
486 if (td != NULL && (entry = readdir(td)) != NULL) {
487 ++in_dir;
488
489 if (sscanf(entry->d_name, "%x", &fn) != 1) {
490 debugs(47, 3, HERE << "invalid entry " << entry->d_name);
491 continue;
492 }
493
494 if (!UFSSwapDir::FilenoBelongsHere(fn, sd->index, curlvl1, curlvl2)) {
495 debugs(47, 3, HERE << std::setfill('0') <<
496 std::hex << std::uppercase << std::setw(8) << fn <<
497 " does not belong in " << std::dec << sd->index << "/" <<
498 curlvl1 << "/" << curlvl2);
499
500 continue;
501 }
502
503 if (sd->mapBitTest(fn)) {
504 debugs(47, 3, HERE << "Locked, continuing with next.");
505 continue;
506 }
507
508 snprintf(fullfilename, MAXPATHLEN, "%s/%s",
509 fullpath, entry->d_name);
510 debugs(47, 3, HERE << "Opening " << fullfilename);
511 fd = file_open(fullfilename, O_RDONLY | O_BINARY);
512
513 if (fd < 0)
514 debugs(47, DBG_IMPORTANT, HERE << "error opening " << fullfilename << ": " << xstrerror());
515 else
516 ++store_open_disk_fd;
517
518 continue;
519 }
520
521 if (td != NULL)
522 closedir(td);
523
524 td = NULL;
525
526 in_dir = 0;
527
528 if (sd->validL2(++curlvl2))
529 continue;
530
531 curlvl2 = 0;
532
533 if (sd->validL1(++curlvl1))
534 continue;
535
536 curlvl1 = 0;
537
538 done = 1;
539 }
540
541 *filn_p = fn;
542 return fd;
543 }
544
545 bool
546 Fs::Ufs::RebuildState::error() const
547 {
548 return false;
549 }
550
551 bool
552 Fs::Ufs::RebuildState::isDone() const
553 {
554 return _done;
555 }
556
557 StoreEntry *
558 Fs::Ufs::RebuildState::currentItem()
559 {
560 return currentEntry();
561 }
562