]> git.ipfire.org Git - thirdparty/squid.git/blob - src/fs/ufs/RebuildState.cc
Merged from trunk rev.14625
[thirdparty/squid.git] / src / fs / ufs / RebuildState.cc
1 /*
2 * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 47 Store Directory Routines */
10
11 #include "squid.h"
12 #include "fs_io.h"
13 #include "globals.h"
14 #include "RebuildState.h"
15 #include "SquidConfig.h"
16 #include "SquidTime.h"
17 #include "store/Disks.h"
18 #include "store_key_md5.h"
19 #include "store_rebuild.h"
20 #include "StoreSwapLogData.h"
21 #include "tools.h"
22 #include "UFSSwapLogParser.h"
23
24 #include <cerrno>
25 #include <cmath>
26 #if HAVE_SYS_STAT_H
27 #include <sys/stat.h>
28 #endif
29
30 CBDATA_NAMESPACED_CLASS_INIT(Fs::Ufs,RebuildState);
31
32 Fs::Ufs::RebuildState::RebuildState(RefCount<UFSSwapDir> aSwapDir) :
33 sd(aSwapDir),
34 n_read(0),
35 LogParser(NULL),
36 curlvl1(0),
37 curlvl2(0),
38 in_dir(0),
39 done(0),
40 fn(0),
41 entry(NULL),
42 td(NULL),
43 e(NULL),
44 fromLog(true),
45 _done(false),
46 cbdata(NULL)
47 {
48 *fullpath = 0;
49 *fullfilename = 0;
50
51 /*
52 * If the swap.state file exists in the cache_dir, then
53 * we'll use commonUfsDirRebuildFromSwapLog(), otherwise we'll
54 * use commonUfsDirRebuildFromDirectory() to open up each file
55 * and suck in the meta data.
56 */
57 int clean = 0; //TODO: change to bool
58 int zeroLengthLog = 0;
59 FILE *fp = sd->openTmpSwapLog(&clean, &zeroLengthLog);
60
61 if (fp && !zeroLengthLog)
62 LogParser = Fs::Ufs::UFSSwapLogParser::GetUFSSwapLogParser(fp);
63
64 if (LogParser == NULL ) {
65 fromLog = false;
66
67 if (fp != NULL)
68 fclose(fp);
69
70 } else {
71 fromLog = true;
72 flags.clean = (clean != 0);
73 }
74
75 if (!clean)
76 flags.need_to_validate = true;
77
78 debugs(47, DBG_IMPORTANT, "Rebuilding storage in " << sd->path << " (" <<
79 (clean ? "clean log" : (LogParser ? "dirty log" : "no log")) << ")");
80 }
81
82 Fs::Ufs::RebuildState::~RebuildState()
83 {
84 sd->closeTmpSwapLog();
85
86 if (LogParser)
87 delete LogParser;
88 }
89
90 void
91 Fs::Ufs::RebuildState::RebuildStep(void *data)
92 {
93 RebuildState *rb = (RebuildState *)data;
94 rb->rebuildStep();
95
96 if (!rb->isDone())
97 eventAdd("storeRebuild", RebuildStep, rb, 0.01, 1);
98 else {
99 -- StoreController::store_dirs_rebuilding;
100 storeRebuildComplete(&rb->counts);
101 delete rb;
102 }
103 }
104
105 /// load entries from swap.state or files until we run out of entries or time
106 void
107 Fs::Ufs::RebuildState::rebuildStep()
108 {
109 currentEntry(NULL);
110
111 // Balance our desire to maximize the number of entries processed at once
112 // (and, hence, minimize overheads and total rebuild time) with a
113 // requirement to also process Coordinator events, disk I/Os, etc.
114 const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
115 const timeval loopStart = current_time;
116
117 const int totalEntries = LogParser ? LogParser->SwapLogEntries() : -1;
118
119 while (!isDone()) {
120 if (fromLog)
121 rebuildFromSwapLog();
122 else
123 rebuildFromDirectory();
124
125 // TODO: teach storeRebuildProgress to handle totalEntries <= 0
126 if (totalEntries > 0 && (n_read % 4000 == 0))
127 storeRebuildProgress(sd->index, totalEntries, n_read);
128
129 if (opt_foreground_rebuild)
130 continue; // skip "few entries at a time" check below
131
132 getCurrentTime();
133 const double elapsedMsec = tvSubMsec(loopStart, current_time);
134 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
135 debugs(47, 5, HERE << "pausing after " << n_read << " entries in " <<
136 elapsedMsec << "ms; " << (elapsedMsec/n_read) << "ms per entry");
137 break;
138 }
139 }
140 }
141
142 /// process one cache file
143 void
144 Fs::Ufs::RebuildState::rebuildFromDirectory()
145 {
146 cache_key key[SQUID_MD5_DIGEST_LENGTH];
147
148 struct stat sb;
149 int fd = -1;
150 debugs(47, 3, HERE << "DIR #" << sd->index);
151
152 assert(fd == -1);
153 sfileno filn = 0;
154 int size;
155 fd = getNextFile(&filn, &size);
156
157 if (fd == -2) {
158 debugs(47, DBG_IMPORTANT, "Done scanning " << sd->path << " dir (" <<
159 n_read << " entries)");
160 _done = true;
161 return;
162 } else if (fd < 0) {
163 return;
164 }
165
166 assert(fd > -1);
167 /* lets get file stats here */
168
169 ++n_read;
170
171 if (fstat(fd, &sb) < 0) {
172 int xerrno = errno;
173 debugs(47, DBG_IMPORTANT, MYNAME << "fstat(FD " << fd << "): " << xstrerr(xerrno));
174 file_close(fd);
175 --store_open_disk_fd;
176 fd = -1;
177 return;
178 }
179
180 MemBuf buf;
181 buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
182 if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
183 return;
184
185 const uint64_t expectedSize = sb.st_size > 0 ?
186 static_cast<uint64_t>(sb.st_size) : 0;
187
188 StoreEntry tmpe;
189 const bool parsed = storeRebuildParseEntry(buf, tmpe, key, counts,
190 expectedSize);
191
192 file_close(fd);
193 --store_open_disk_fd;
194 fd = -1;
195
196 bool accepted = parsed && tmpe.swap_file_sz > 0;
197 if (parsed && !accepted) {
198 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring ufs cache entry with " <<
199 "unknown size: " << tmpe);
200 accepted = false;
201 }
202
203 if (!accepted) {
204 // XXX: shouldn't this be a call to commonUfsUnlink?
205 sd->unlinkFile(filn); // should we unlink in all failure cases?
206 return;
207 }
208
209 if (!storeRebuildKeepEntry(tmpe, key, counts))
210 return;
211
212 ++counts.objcount;
213 // tmpe.dump(5);
214 currentEntry(sd->addDiskRestore(key,
215 filn,
216 tmpe.swap_file_sz,
217 tmpe.expires,
218 tmpe.timestamp,
219 tmpe.lastref,
220 tmpe.lastmod,
221 tmpe.refcount, /* refcount */
222 tmpe.flags, /* flags */
223 (int) flags.clean));
224 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
225 }
226
227 StoreEntry *
228 Fs::Ufs::RebuildState::currentEntry() const
229 {
230 return e;
231 }
232
233 void
234 Fs::Ufs::RebuildState::currentEntry(StoreEntry *newValue)
235 {
236 e = newValue;
237 }
238
239 /// process one swap log entry
240 void
241 Fs::Ufs::RebuildState::rebuildFromSwapLog()
242 {
243 StoreSwapLogData swapData;
244
245 if (LogParser->ReadRecord(swapData) != 1) {
246 debugs(47, DBG_IMPORTANT, "Done reading " << sd->path << " swaplog (" << n_read << " entries)");
247 LogParser->Close();
248 delete LogParser;
249 LogParser = NULL;
250 _done = true;
251 return;
252 }
253
254 ++n_read;
255
256 if (!swapData.sane()) {
257 ++counts.invalid;
258 return;
259 }
260
261 /*
262 * BC: during 2.4 development, we changed the way swap file
263 * numbers are assigned and stored. The high 16 bits used
264 * to encode the SD index number. There used to be a call
265 * to storeDirProperFileno here that re-assigned the index
266 * bits. Now, for backwards compatibility, we just need
267 * to mask it off.
268 */
269 swapData.swap_filen &= 0x00FFFFFF;
270
271 debugs(47, 3, HERE << swap_log_op_str[(int) swapData.op] << " " <<
272 storeKeyText(swapData.key) << " "<< std::setfill('0') <<
273 std::hex << std::uppercase << std::setw(8) <<
274 swapData.swap_filen);
275
276 if (swapData.op == SWAP_LOG_ADD) {
277 (void) 0;
278 } else if (swapData.op == SWAP_LOG_DEL) {
279 /* Delete unless we already have a newer copy anywhere in any store */
280 /* this needs to become
281 * 1) unpack url
282 * 2) make synthetic request with headers ?? or otherwise search
283 * for a matching object in the store
284 * TODO FIXME change to new async api
285 */
286 currentEntry (Store::Root().get(swapData.key));
287
288 if (currentEntry() != NULL && swapData.lastref >= e->lastref) {
289 undoAdd();
290 --counts.objcount;
291 ++counts.cancelcount;
292 }
293 return;
294 } else {
295 const double
296 x = ::log(static_cast<double>(++counts.bad_log_op)) / ::log(10.0);
297
298 if (0.0 == x - (double) (int) x)
299 debugs(47, DBG_IMPORTANT, "WARNING: " << counts.bad_log_op << " invalid swap log entries found");
300
301 ++counts.invalid;
302
303 return;
304 }
305
306 ++counts.scancount; // XXX: should not this be incremented earlier?
307
308 if (!sd->validFileno(swapData.swap_filen, 0)) {
309 ++counts.invalid;
310 return;
311 }
312
313 if (EBIT_TEST(swapData.flags, KEY_PRIVATE)) {
314 ++counts.badflags;
315 return;
316 }
317
318 /* this needs to become
319 * 1) unpack url
320 * 2) make synthetic request with headers ?? or otherwise search
321 * for a matching object in the store
322 * TODO FIXME change to new async api
323 */
324 currentEntry (Store::Root().get(swapData.key));
325
326 int used; /* is swapfile already in use? */
327
328 used = sd->mapBitTest(swapData.swap_filen);
329
330 /* If this URL already exists in the cache, does the swap log
331 * appear to have a newer entry? Compare 'lastref' from the
332 * swap log to e->lastref. */
333 /* is the log entry newer than current entry? */
334 int disk_entry_newer = currentEntry() ? (swapData.lastref > currentEntry()->lastref ? 1 : 0) : 0;
335
336 if (used && !disk_entry_newer) {
337 /* log entry is old, ignore it */
338 ++counts.clashcount;
339 return;
340 } else if (used && currentEntry() && currentEntry()->swap_filen == swapData.swap_filen && currentEntry()->swap_dirn == sd->index) {
341 /* swapfile taken, same URL, newer, update meta */
342
343 if (currentEntry()->store_status == STORE_OK) {
344 currentEntry()->lastref = swapData.timestamp;
345 currentEntry()->timestamp = swapData.timestamp;
346 currentEntry()->expires = swapData.expires;
347 currentEntry()->lastmod = swapData.lastmod;
348 currentEntry()->flags = swapData.flags;
349 currentEntry()->refcount += swapData.refcount;
350 sd->dereference(*currentEntry());
351 } else {
352 debug_trap("commonUfsDirRebuildFromSwapLog: bad condition");
353 debugs(47, DBG_IMPORTANT, HERE << "bad condition");
354 }
355 return;
356 } else if (used) {
357 /* swapfile in use, not by this URL, log entry is newer */
358 /* This is sorta bad: the log entry should NOT be newer at this
359 * point. If the log is dirty, the filesize check should have
360 * caught this. If the log is clean, there should never be a
361 * newer entry. */
362 debugs(47, DBG_IMPORTANT, "WARNING: newer swaplog entry for dirno " <<
363 sd->index << ", fileno "<< std::setfill('0') << std::hex <<
364 std::uppercase << std::setw(8) << swapData.swap_filen);
365
366 /* I'm tempted to remove the swapfile here just to be safe,
367 * but there is a bad race condition in the NOVM version if
368 * the swapfile has recently been opened for writing, but
369 * not yet opened for reading. Because we can't map
370 * swapfiles back to StoreEntrys, we don't know the state
371 * of the entry using that file. */
372 /* We'll assume the existing entry is valid, probably because
373 * were in a slow rebuild and the the swap file number got taken
374 * and the validation procedure hasn't run. */
375 assert(flags.need_to_validate);
376 ++counts.clashcount;
377 return;
378 } else if (currentEntry() && !disk_entry_newer) {
379 /* key already exists, current entry is newer */
380 /* keep old, ignore new */
381 ++counts.dupcount;
382 return;
383 } else if (currentEntry()) {
384 /* key already exists, this swapfile not being used */
385 /* junk old, load new */
386 undoAdd();
387 --counts.objcount;
388 ++counts.dupcount;
389 } else {
390 /* URL doesnt exist, swapfile not in use */
391 /* load new */
392 (void) 0;
393 }
394
395 ++counts.objcount;
396
397 currentEntry(sd->addDiskRestore(swapData.key,
398 swapData.swap_filen,
399 swapData.swap_file_sz,
400 swapData.expires,
401 swapData.timestamp,
402 swapData.lastref,
403 swapData.lastmod,
404 swapData.refcount,
405 swapData.flags,
406 (int) flags.clean));
407
408 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
409 }
410
411 /// undo the effects of adding an entry in rebuildFromSwapLog()
412 void
413 Fs::Ufs::RebuildState::undoAdd()
414 {
415 StoreEntry *added = currentEntry();
416 assert(added);
417 currentEntry(NULL);
418
419 // TODO: Why bother with these two if we are going to release?!
420 added->expireNow();
421 added->releaseRequest();
422
423 if (added->swap_filen > -1) {
424 SwapDir *someDir = INDEXSD(added->swap_dirn);
425 assert(someDir);
426 if (UFSSwapDir *ufsDir = dynamic_cast<UFSSwapDir*>(someDir))
427 ufsDir->undoAddDiskRestore(added);
428 // else the entry was loaded from and/or is currently in a non-UFS dir
429 // Thus, there is no use in preserving its disk file (the only purpose
430 // of undoAddDiskRestore!), even if we could. Instead, we release the
431 // the entry and [eventually] unlink its disk file or free its slot.
432 }
433
434 added->release();
435 }
436
437 int
438 Fs::Ufs::RebuildState::getNextFile(sfileno * filn_p, int *)
439 {
440 int fd = -1;
441 int dirs_opened = 0;
442 debugs(47, 3, HERE << "flag=" << flags.init << ", " <<
443 sd->index << ": /"<< std::setfill('0') << std::hex <<
444 std::uppercase << std::setw(2) << curlvl1 << "/" << std::setw(2) <<
445 curlvl2);
446
447 if (done)
448 return -2;
449
450 while (fd < 0 && done == 0) {
451 fd = -1;
452
453 if (!flags.init) { /* initialize, open first file */
454 // XXX: 0's should not be needed, constructor inits now
455 done = 0;
456 curlvl1 = 0;
457 curlvl2 = 0;
458 in_dir = 0;
459 flags.init = true;
460 assert(Config.cacheSwap.n_configured > 0);
461 }
462
463 if (0 == in_dir) { /* we need to read in a new directory */
464 snprintf(fullpath, MAXPATHLEN, "%s/%02X/%02X",
465 sd->path,
466 curlvl1, curlvl2);
467
468 if (dirs_opened)
469 return -1;
470
471 td = opendir(fullpath);
472
473 ++dirs_opened;
474
475 if (!td) {
476 int xerrno = errno;
477 debugs(47, DBG_IMPORTANT, MYNAME << "error in opendir (" << fullpath << "): " << xstrerr(xerrno));
478 } else {
479 entry = readdir(td); /* skip . and .. */
480 entry = readdir(td);
481
482 if (entry == NULL && errno == ENOENT)
483 debugs(47, DBG_IMPORTANT, HERE << "WARNING: directory does not exist!");
484 debugs(47, 3, HERE << "Directory " << fullpath);
485 }
486 }
487
488 if (td != NULL && (entry = readdir(td)) != NULL) {
489 ++in_dir;
490
491 if (sscanf(entry->d_name, "%x", &fn) != 1) {
492 debugs(47, 3, HERE << "invalid entry " << entry->d_name);
493 continue;
494 }
495
496 if (!UFSSwapDir::FilenoBelongsHere(fn, sd->index, curlvl1, curlvl2)) {
497 debugs(47, 3, HERE << std::setfill('0') <<
498 std::hex << std::uppercase << std::setw(8) << fn <<
499 " does not belong in " << std::dec << sd->index << "/" <<
500 curlvl1 << "/" << curlvl2);
501
502 continue;
503 }
504
505 if (sd->mapBitTest(fn)) {
506 debugs(47, 3, HERE << "Locked, continuing with next.");
507 continue;
508 }
509
510 snprintf(fullfilename, MAXPATHLEN, "%s/%s",
511 fullpath, entry->d_name);
512 debugs(47, 3, HERE << "Opening " << fullfilename);
513 fd = file_open(fullfilename, O_RDONLY | O_BINARY);
514
515 if (fd < 0) {
516 int xerrno = errno;
517 debugs(47, DBG_IMPORTANT, MYNAME << "error opening " << fullfilename << ": " << xstrerr(xerrno));
518 } else
519 ++store_open_disk_fd;
520
521 continue;
522 }
523
524 if (td != NULL)
525 closedir(td);
526
527 td = NULL;
528
529 in_dir = 0;
530
531 if (sd->validL2(++curlvl2))
532 continue;
533
534 curlvl2 = 0;
535
536 if (sd->validL1(++curlvl1))
537 continue;
538
539 curlvl1 = 0;
540
541 done = 1;
542 }
543
544 *filn_p = fn;
545 return fd;
546 }
547
548 bool
549 Fs::Ufs::RebuildState::error() const
550 {
551 return false;
552 }
553
554 bool
555 Fs::Ufs::RebuildState::isDone() const
556 {
557 return _done;
558 }
559
560 StoreEntry *
561 Fs::Ufs::RebuildState::currentItem()
562 {
563 return currentEntry();
564 }
565