]> git.ipfire.org Git - thirdparty/squid.git/blob - src/fs/ufs/RebuildState.cc
b817a26aee70958a94ce7f3d7a48695ca6a76ad4
[thirdparty/squid.git] / src / fs / ufs / RebuildState.cc
1 /*
2 * DEBUG: section 47 Store Directory Routines
3 * AUTHOR: Robert Collins
4 *
5 * SQUID Web Proxy Cache http://www.squid-cache.org/
6 * ----------------------------------------------------------
7 *
8 * Squid is the result of efforts by numerous individuals from
9 * the Internet community; see the CONTRIBUTORS file for full
10 * details. Many organizations have provided support for Squid's
11 * development; see the SPONSORS file for full details. Squid is
12 * Copyrighted (C) 2001 by the Regents of the University of
13 * California; see the COPYRIGHT file for full details. Squid
14 * incorporates software developed and/or copyrighted by other
15 * sources; see the CREDITS file for full details.
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
30 *
31 */
32
33 #include "squid.h"
34 #include "disk.h"
35 #include "globals.h"
36 #include "RebuildState.h"
37 #include "SquidConfig.h"
38 #include "SquidTime.h"
39 #include "store_key_md5.h"
40 #include "store_rebuild.h"
41 #include "StoreSwapLogData.h"
42 #include "tools.h"
43 #include "UFSSwapLogParser.h"
44
45 #if HAVE_MATH_H
46 #include <math.h>
47 #endif
48 #if HAVE_SYS_STAT_H
49 #include <sys/stat.h>
50 #endif
51 #if HAVE_ERRNO_H
52 #include <errno.h>
53 #endif
54
55 CBDATA_NAMESPACED_CLASS_INIT(Fs::Ufs,RebuildState);
56
57 Fs::Ufs::RebuildState::RebuildState(RefCount<UFSSwapDir> aSwapDir) :
58 sd (aSwapDir), LogParser(NULL), e(NULL), fromLog(true), _done (false)
59 {
60 /*
61 * If the swap.state file exists in the cache_dir, then
62 * we'll use commonUfsDirRebuildFromSwapLog(), otherwise we'll
63 * use commonUfsDirRebuildFromDirectory() to open up each file
64 * and suck in the meta data.
65 */
66 int clean = 0; //TODO: change to bool
67 int zeroLengthLog = 0;
68 FILE *fp = sd->openTmpSwapLog(&clean, &zeroLengthLog);
69
70 if (fp && !zeroLengthLog)
71 LogParser = Fs::Ufs::UFSSwapLogParser::GetUFSSwapLogParser(fp);
72
73 if (LogParser == NULL ) {
74 fromLog = false;
75
76 if (fp != NULL)
77 fclose(fp);
78
79 } else {
80 fromLog = true;
81 flags.clean = (clean != 0);
82 }
83
84 if (!clean)
85 flags.need_to_validate = true;
86
87 debugs(47, DBG_IMPORTANT, "Rebuilding storage in " << sd->path << " (" <<
88 (clean ? "clean log" : (LogParser ? "dirty log" : "no log")) << ")");
89 }
90
91 Fs::Ufs::RebuildState::~RebuildState()
92 {
93 sd->closeTmpSwapLog();
94
95 if (LogParser)
96 delete LogParser;
97 }
98
99 void
100 Fs::Ufs::RebuildState::RebuildStep(void *data)
101 {
102 RebuildState *rb = (RebuildState *)data;
103 rb->rebuildStep();
104
105 if (!rb->isDone())
106 eventAdd("storeRebuild", RebuildStep, rb, 0.01, 1);
107 else {
108 -- StoreController::store_dirs_rebuilding;
109 storeRebuildComplete(&rb->counts);
110 delete rb;
111 }
112 }
113
114 /// load entries from swap.state or files until we run out of entries or time
115 void
116 Fs::Ufs::RebuildState::rebuildStep()
117 {
118 currentEntry(NULL);
119
120 // Balance our desire to maximize the number of entries processed at once
121 // (and, hence, minimize overheads and total rebuild time) with a
122 // requirement to also process Coordinator events, disk I/Os, etc.
123 const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
124 const timeval loopStart = current_time;
125
126 const int totalEntries = LogParser ? LogParser->SwapLogEntries() : -1;
127
128 while (!isDone()) {
129 if (fromLog)
130 rebuildFromSwapLog();
131 else
132 rebuildFromDirectory();
133
134 // TODO: teach storeRebuildProgress to handle totalEntries <= 0
135 if (totalEntries > 0 && (n_read % 4000 == 0))
136 storeRebuildProgress(sd->index, totalEntries, n_read);
137
138 if (opt_foreground_rebuild)
139 continue; // skip "few entries at a time" check below
140
141 getCurrentTime();
142 const double elapsedMsec = tvSubMsec(loopStart, current_time);
143 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
144 debugs(47, 5, HERE << "pausing after " << n_read << " entries in " <<
145 elapsedMsec << "ms; " << (elapsedMsec/n_read) << "ms per entry");
146 break;
147 }
148 }
149 }
150
151 /// process one cache file
152 void
153 Fs::Ufs::RebuildState::rebuildFromDirectory()
154 {
155 cache_key key[SQUID_MD5_DIGEST_LENGTH];
156
157 struct stat sb;
158 int fd = -1;
159 assert(this != NULL);
160 debugs(47, 3, HERE << "DIR #" << sd->index);
161
162 assert(fd == -1);
163 sfileno filn = 0;
164 int size;
165 fd = getNextFile(&filn, &size);
166
167 if (fd == -2) {
168 debugs(47, DBG_IMPORTANT, "Done scanning " << sd->path << " dir (" <<
169 n_read << " entries)");
170 _done = true;
171 return;
172 } else if (fd < 0) {
173 return;
174 }
175
176 assert(fd > -1);
177 /* lets get file stats here */
178
179 ++n_read;
180
181 if (fstat(fd, &sb) < 0) {
182 debugs(47, DBG_IMPORTANT, HERE << "fstat(FD " << fd << "): " << xstrerror());
183 file_close(fd);
184 --store_open_disk_fd;
185 fd = -1;
186 return;
187 }
188
189 MemBuf buf;
190 buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
191 if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
192 return;
193
194 const uint64_t expectedSize = sb.st_size > 0 ?
195 static_cast<uint64_t>(sb.st_size) : 0;
196
197 StoreEntry tmpe;
198 const bool parsed = storeRebuildParseEntry(buf, tmpe, key, counts,
199 expectedSize);
200
201 file_close(fd);
202 --store_open_disk_fd;
203 fd = -1;
204
205 bool accepted = parsed && tmpe.swap_file_sz > 0;
206 if (parsed && !accepted) {
207 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring ufs cache entry with " <<
208 "unknown size: " << tmpe);
209 accepted = false;
210 }
211
212 if (!accepted) {
213 // XXX: shouldn't this be a call to commonUfsUnlink?
214 sd->unlinkFile(filn); // should we unlink in all failure cases?
215 return;
216 }
217
218 if (!storeRebuildKeepEntry(tmpe, key, counts))
219 return;
220
221 ++counts.objcount;
222 // tmpe.dump(5);
223 currentEntry(sd->addDiskRestore(key,
224 filn,
225 tmpe.swap_file_sz,
226 tmpe.expires,
227 tmpe.timestamp,
228 tmpe.lastref,
229 tmpe.lastmod,
230 tmpe.refcount, /* refcount */
231 tmpe.flags, /* flags */
232 (int) flags.clean));
233 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
234 }
235
236 StoreEntry *
237 Fs::Ufs::RebuildState::currentEntry() const
238 {
239 return e;
240 }
241
242 void
243 Fs::Ufs::RebuildState::currentEntry(StoreEntry *newValue)
244 {
245 e = newValue;
246 }
247
248 /// process one swap log entry
249 void
250 Fs::Ufs::RebuildState::rebuildFromSwapLog()
251 {
252 StoreSwapLogData swapData;
253
254 if (LogParser->ReadRecord(swapData) != 1) {
255 debugs(47, DBG_IMPORTANT, "Done reading " << sd->path << " swaplog (" << n_read << " entries)");
256 LogParser->Close();
257 delete LogParser;
258 LogParser = NULL;
259 _done = true;
260 return;
261 }
262
263 ++n_read;
264
265 if (!swapData.sane()) {
266 ++counts.invalid;
267 return;
268 }
269
270 /*
271 * BC: during 2.4 development, we changed the way swap file
272 * numbers are assigned and stored. The high 16 bits used
273 * to encode the SD index number. There used to be a call
274 * to storeDirProperFileno here that re-assigned the index
275 * bits. Now, for backwards compatibility, we just need
276 * to mask it off.
277 */
278 swapData.swap_filen &= 0x00FFFFFF;
279
280 debugs(47, 3, HERE << swap_log_op_str[(int) swapData.op] << " " <<
281 storeKeyText(swapData.key) << " "<< std::setfill('0') <<
282 std::hex << std::uppercase << std::setw(8) <<
283 swapData.swap_filen);
284
285 if (swapData.op == SWAP_LOG_ADD) {
286 (void) 0;
287 } else if (swapData.op == SWAP_LOG_DEL) {
288 /* Delete unless we already have a newer copy anywhere in any store */
289 /* this needs to become
290 * 1) unpack url
291 * 2) make synthetic request with headers ?? or otherwise search
292 * for a matching object in the store
293 * TODO FIXME change to new async api
294 */
295 currentEntry (Store::Root().get(swapData.key));
296
297 if (currentEntry() != NULL && swapData.lastref >= e->lastref) {
298 undoAdd();
299 --counts.objcount;
300 ++counts.cancelcount;
301 }
302 return;
303 } else {
304 const double
305 x = ::log(static_cast<double>(++counts.bad_log_op)) / ::log(10.0);
306
307 if (0.0 == x - (double) (int) x)
308 debugs(47, DBG_IMPORTANT, "WARNING: " << counts.bad_log_op << " invalid swap log entries found");
309
310 ++counts.invalid;
311
312 return;
313 }
314
315 ++counts.scancount; // XXX: should not this be incremented earlier?
316
317 if (!sd->validFileno(swapData.swap_filen, 0)) {
318 ++counts.invalid;
319 return;
320 }
321
322 if (EBIT_TEST(swapData.flags, KEY_PRIVATE)) {
323 ++counts.badflags;
324 return;
325 }
326
327 /* this needs to become
328 * 1) unpack url
329 * 2) make synthetic request with headers ?? or otherwise search
330 * for a matching object in the store
331 * TODO FIXME change to new async api
332 */
333 currentEntry (Store::Root().get(swapData.key));
334
335 int used; /* is swapfile already in use? */
336
337 used = sd->mapBitTest(swapData.swap_filen);
338
339 /* If this URL already exists in the cache, does the swap log
340 * appear to have a newer entry? Compare 'lastref' from the
341 * swap log to e->lastref. */
342 /* is the log entry newer than current entry? */
343 int disk_entry_newer = currentEntry() ? (swapData.lastref > currentEntry()->lastref ? 1 : 0) : 0;
344
345 if (used && !disk_entry_newer) {
346 /* log entry is old, ignore it */
347 ++counts.clashcount;
348 return;
349 } else if (used && currentEntry() && currentEntry()->swap_filen == swapData.swap_filen && currentEntry()->swap_dirn == sd->index) {
350 /* swapfile taken, same URL, newer, update meta */
351
352 if (currentEntry()->store_status == STORE_OK) {
353 currentEntry()->lastref = swapData.timestamp;
354 currentEntry()->timestamp = swapData.timestamp;
355 currentEntry()->expires = swapData.expires;
356 currentEntry()->lastmod = swapData.lastmod;
357 currentEntry()->flags = swapData.flags;
358 currentEntry()->refcount += swapData.refcount;
359 sd->dereference(*currentEntry(), false);
360 } else {
361 debug_trap("commonUfsDirRebuildFromSwapLog: bad condition");
362 debugs(47, DBG_IMPORTANT, HERE << "bad condition");
363 }
364 return;
365 } else if (used) {
366 /* swapfile in use, not by this URL, log entry is newer */
367 /* This is sorta bad: the log entry should NOT be newer at this
368 * point. If the log is dirty, the filesize check should have
369 * caught this. If the log is clean, there should never be a
370 * newer entry. */
371 debugs(47, DBG_IMPORTANT, "WARNING: newer swaplog entry for dirno " <<
372 sd->index << ", fileno "<< std::setfill('0') << std::hex <<
373 std::uppercase << std::setw(8) << swapData.swap_filen);
374
375 /* I'm tempted to remove the swapfile here just to be safe,
376 * but there is a bad race condition in the NOVM version if
377 * the swapfile has recently been opened for writing, but
378 * not yet opened for reading. Because we can't map
379 * swapfiles back to StoreEntrys, we don't know the state
380 * of the entry using that file. */
381 /* We'll assume the existing entry is valid, probably because
382 * were in a slow rebuild and the the swap file number got taken
383 * and the validation procedure hasn't run. */
384 assert(flags.need_to_validate);
385 ++counts.clashcount;
386 return;
387 } else if (currentEntry() && !disk_entry_newer) {
388 /* key already exists, current entry is newer */
389 /* keep old, ignore new */
390 ++counts.dupcount;
391 return;
392 } else if (currentEntry()) {
393 /* key already exists, this swapfile not being used */
394 /* junk old, load new */
395 undoAdd();
396 --counts.objcount;
397 ++counts.dupcount;
398 } else {
399 /* URL doesnt exist, swapfile not in use */
400 /* load new */
401 (void) 0;
402 }
403
404 ++counts.objcount;
405
406 currentEntry(sd->addDiskRestore(swapData.key,
407 swapData.swap_filen,
408 swapData.swap_file_sz,
409 swapData.expires,
410 swapData.timestamp,
411 swapData.lastref,
412 swapData.lastmod,
413 swapData.refcount,
414 swapData.flags,
415 (int) flags.clean));
416
417 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
418 }
419
420 /// undo the effects of adding an entry in rebuildFromSwapLog()
421 void
422 Fs::Ufs::RebuildState::undoAdd()
423 {
424 StoreEntry *added = currentEntry();
425 assert(added);
426 currentEntry(NULL);
427
428 // TODO: Why bother with these two if we are going to release?!
429 added->expireNow();
430 added->releaseRequest();
431
432 if (added->swap_filen > -1) {
433 SwapDir *someDir = INDEXSD(added->swap_dirn);
434 assert(someDir);
435 if (UFSSwapDir *ufsDir = dynamic_cast<UFSSwapDir*>(someDir))
436 ufsDir->undoAddDiskRestore(added);
437 // else the entry was loaded from and/or is currently in a non-UFS dir
438 // Thus, there is no use in preserving its disk file (the only purpose
439 // of undoAddDiskRestore!), even if we could. Instead, we release the
440 // the entry and [eventually] unlink its disk file or free its slot.
441 }
442
443 added->release();
444 }
445
446 int
447 Fs::Ufs::RebuildState::getNextFile(sfileno * filn_p, int *size)
448 {
449 int fd = -1;
450 int dirs_opened = 0;
451 debugs(47, 3, HERE << "flag=" << flags.init << ", " <<
452 sd->index << ": /"<< std::setfill('0') << std::hex <<
453 std::uppercase << std::setw(2) << curlvl1 << "/" << std::setw(2) <<
454 curlvl2);
455
456 if (done)
457 return -2;
458
459 while (fd < 0 && done == 0) {
460 fd = -1;
461
462 if (!flags.init) { /* initialize, open first file */
463 done = 0;
464 curlvl1 = 0;
465 curlvl2 = 0;
466 in_dir = 0;
467 flags.init = true;
468 assert(Config.cacheSwap.n_configured > 0);
469 }
470
471 if (0 == in_dir) { /* we need to read in a new directory */
472 snprintf(fullpath, MAXPATHLEN, "%s/%02X/%02X",
473 sd->path,
474 curlvl1, curlvl2);
475
476 if (dirs_opened)
477 return -1;
478
479 td = opendir(fullpath);
480
481 ++dirs_opened;
482
483 if (td == NULL) {
484 debugs(47, DBG_IMPORTANT, HERE << "error in opendir (" << fullpath << "): " << xstrerror());
485 } else {
486 entry = readdir(td); /* skip . and .. */
487 entry = readdir(td);
488
489 if (entry == NULL && errno == ENOENT)
490 debugs(47, DBG_IMPORTANT, HERE << "WARNING: directory does not exist!");
491 debugs(47, 3, HERE << "Directory " << fullpath);
492 }
493 }
494
495 if (td != NULL && (entry = readdir(td)) != NULL) {
496 ++in_dir;
497
498 if (sscanf(entry->d_name, "%x", &fn) != 1) {
499 debugs(47, 3, HERE << "invalid entry " << entry->d_name);
500 continue;
501 }
502
503 if (!UFSSwapDir::FilenoBelongsHere(fn, sd->index, curlvl1, curlvl2)) {
504 debugs(47, 3, HERE << std::setfill('0') <<
505 std::hex << std::uppercase << std::setw(8) << fn <<
506 " does not belong in " << std::dec << sd->index << "/" <<
507 curlvl1 << "/" << curlvl2);
508
509 continue;
510 }
511
512 if (sd->mapBitTest(fn)) {
513 debugs(47, 3, HERE << "Locked, continuing with next.");
514 continue;
515 }
516
517 snprintf(fullfilename, MAXPATHLEN, "%s/%s",
518 fullpath, entry->d_name);
519 debugs(47, 3, HERE << "Opening " << fullfilename);
520 fd = file_open(fullfilename, O_RDONLY | O_BINARY);
521
522 if (fd < 0)
523 debugs(47, DBG_IMPORTANT, HERE << "error opening " << fullfilename << ": " << xstrerror());
524 else
525 ++store_open_disk_fd;
526
527 continue;
528 }
529
530 if (td != NULL)
531 closedir(td);
532
533 td = NULL;
534
535 in_dir = 0;
536
537 if (sd->validL2(++curlvl2))
538 continue;
539
540 curlvl2 = 0;
541
542 if (sd->validL1(++curlvl1))
543 continue;
544
545 curlvl1 = 0;
546
547 done = 1;
548 }
549
550 *filn_p = fn;
551 return fd;
552 }
553
554 bool
555 Fs::Ufs::RebuildState::error() const
556 {
557 return false;
558 }
559
560 bool
561 Fs::Ufs::RebuildState::isDone() const
562 {
563 return _done;
564 }
565
566 StoreEntry *
567 Fs::Ufs::RebuildState::currentItem()
568 {
569 return currentEntry();
570 }