]> git.ipfire.org Git - thirdparty/squid.git/blob - src/fs/ufs/RebuildState.cc
sourceformat: split protos.h into more specific headers, change many functions' likag...
[thirdparty/squid.git] / src / fs / ufs / RebuildState.cc
1 /*
2 * DEBUG: section 47 Store Directory Routines
3 * AUTHOR: Robert Collins
4 *
5 * SQUID Web Proxy Cache http://www.squid-cache.org/
6 * ----------------------------------------------------------
7 *
8 * Squid is the result of efforts by numerous individuals from
9 * the Internet community; see the CONTRIBUTORS file for full
10 * details. Many organizations have provided support for Squid's
11 * development; see the SPONSORS file for full details. Squid is
12 * Copyrighted (C) 2001 by the Regents of the University of
13 * California; see the COPYRIGHT file for full details. Squid
14 * incorporates software developed and/or copyrighted by other
15 * sources; see the CREDITS file for full details.
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
30 *
31 */
32
33 #include "squid.h"
34 #include "disk.h"
35 #include "globals.h"
36 #include "RebuildState.h"
37 #include "SquidConfig.h"
38 #include "SquidTime.h"
39 #include "store_key_md5.h"
40 #include "store_rebuild.h"
41 #include "StoreSwapLogData.h"
42 #include "tools.h"
43 #include "UFSSwapLogParser.h"
44
45 #if HAVE_MATH_H
46 #include <math.h>
47 #endif
48 #if HAVE_SYS_STAT_H
49 #include <sys/stat.h>
50 #endif
51 #if HAVE_ERRNO_H
52 #include <errno.h>
53 #endif
54
55 CBDATA_NAMESPACED_CLASS_INIT(Fs::Ufs,RebuildState);
56
57 Fs::Ufs::RebuildState::RebuildState(RefCount<UFSSwapDir> aSwapDir) :
58 sd (aSwapDir), LogParser(NULL), e(NULL), fromLog(true), _done (false)
59 {
60 /*
61 * If the swap.state file exists in the cache_dir, then
62 * we'll use commonUfsDirRebuildFromSwapLog(), otherwise we'll
63 * use commonUfsDirRebuildFromDirectory() to open up each file
64 * and suck in the meta data.
65 */
66 int clean = 0;
67 int zeroLengthLog = 0;
68 FILE *fp = sd->openTmpSwapLog(&clean, &zeroLengthLog);
69
70 if (fp && !zeroLengthLog)
71 LogParser = Fs::Ufs::UFSSwapLogParser::GetUFSSwapLogParser(fp);
72
73 if (LogParser == NULL ) {
74 fromLog = false;
75
76 if (fp != NULL)
77 fclose(fp);
78
79 } else {
80 fromLog = true;
81 flags.clean = (unsigned int) clean;
82 }
83
84 if (!clean)
85 flags.need_to_validate = 1;
86
87 debugs(47, DBG_IMPORTANT, "Rebuilding storage in " << sd->path << " (" <<
88 (clean ? "clean log" : (LogParser ? "dirty log" : "no log")) << ")");
89 }
90
91 Fs::Ufs::RebuildState::~RebuildState()
92 {
93 sd->closeTmpSwapLog();
94
95 if (LogParser)
96 delete LogParser;
97 }
98
99 void
100 Fs::Ufs::RebuildState::RebuildStep(void *data)
101 {
102 RebuildState *rb = (RebuildState *)data;
103 rb->rebuildStep();
104
105 if (!rb->isDone())
106 eventAdd("storeRebuild", RebuildStep, rb, 0.01, 1);
107 else {
108 -- StoreController::store_dirs_rebuilding;
109 storeRebuildComplete(&rb->counts);
110 delete rb;
111 }
112 }
113
114 /// load entries from swap.state or files until we run out of entries or time
115 void
116 Fs::Ufs::RebuildState::rebuildStep()
117 {
118 currentEntry(NULL);
119
120 // Balance our desire to maximize the number of entries processed at once
121 // (and, hence, minimize overheads and total rebuild time) with a
122 // requirement to also process Coordinator events, disk I/Os, etc.
123 const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
124 const timeval loopStart = current_time;
125
126 const int totalEntries = LogParser ? LogParser->SwapLogEntries() : -1;
127
128 while (!isDone()) {
129 if (fromLog)
130 rebuildFromSwapLog();
131 else
132 rebuildFromDirectory();
133
134 // TODO: teach storeRebuildProgress to handle totalEntries <= 0
135 if (totalEntries > 0 && (n_read % 4000 == 0))
136 storeRebuildProgress(sd->index, totalEntries, n_read);
137
138 if (opt_foreground_rebuild)
139 continue; // skip "few entries at a time" check below
140
141 getCurrentTime();
142 const double elapsedMsec = tvSubMsec(loopStart, current_time);
143 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
144 debugs(47, 5, HERE << "pausing after " << n_read << " entries in " <<
145 elapsedMsec << "ms; " << (elapsedMsec/n_read) << "ms per entry");
146 break;
147 }
148 }
149 }
150
151 /// process one cache file
152 void
153 Fs::Ufs::RebuildState::rebuildFromDirectory()
154 {
155 cache_key key[SQUID_MD5_DIGEST_LENGTH];
156
157 struct stat sb;
158 int fd = -1;
159 assert(this != NULL);
160 debugs(47, 3, HERE << "DIR #" << sd->index);
161
162 assert(fd == -1);
163 sfileno filn = 0;
164 int size;
165 fd = getNextFile(&filn, &size);
166
167 if (fd == -2) {
168 debugs(47, DBG_IMPORTANT, "Done scanning " << sd->path << " dir (" <<
169 n_read << " entries)");
170 _done = true;
171 return;
172 } else if (fd < 0) {
173 return;
174 }
175
176 assert(fd > -1);
177 /* lets get file stats here */
178
179 ++n_read;
180
181 if (fstat(fd, &sb) < 0) {
182 debugs(47, DBG_IMPORTANT, HERE << "fstat(FD " << fd << "): " << xstrerror());
183 file_close(fd);
184 --store_open_disk_fd;
185 fd = -1;
186 return;
187 }
188
189 MemBuf buf;
190 buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
191 if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
192 return;
193
194 StoreEntry tmpe;
195 const bool loaded = storeRebuildParseEntry(buf, tmpe, key, counts,
196 (int64_t)sb.st_size);
197
198 file_close(fd);
199 --store_open_disk_fd;
200 fd = -1;
201
202 if (!loaded) {
203 // XXX: shouldn't this be a call to commonUfsUnlink?
204 sd->unlinkFile(filn); // should we unlink in all failure cases?
205 return;
206 }
207
208 if (!storeRebuildKeepEntry(tmpe, key, counts))
209 return;
210
211 ++counts.objcount;
212 // tmpe.dump(5);
213 currentEntry(sd->addDiskRestore(key,
214 filn,
215 tmpe.swap_file_sz,
216 tmpe.expires,
217 tmpe.timestamp,
218 tmpe.lastref,
219 tmpe.lastmod,
220 tmpe.refcount, /* refcount */
221 tmpe.flags, /* flags */
222 (int) flags.clean));
223 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
224 }
225
226 StoreEntry *
227 Fs::Ufs::RebuildState::currentEntry() const
228 {
229 return e;
230 }
231
232 void
233 Fs::Ufs::RebuildState::currentEntry(StoreEntry *newValue)
234 {
235 e = newValue;
236 }
237
238 /// process one swap log entry
239 void
240 Fs::Ufs::RebuildState::rebuildFromSwapLog()
241 {
242 StoreSwapLogData swapData;
243
244 if (LogParser->ReadRecord(swapData) != 1) {
245 debugs(47, DBG_IMPORTANT, "Done reading " << sd->path << " swaplog (" << n_read << " entries)");
246 LogParser->Close();
247 delete LogParser;
248 LogParser = NULL;
249 _done = true;
250 return;
251 }
252
253 ++n_read;
254
255 if (!swapData.sane()) {
256 ++counts.invalid;
257 return;
258 }
259
260 /*
261 * BC: during 2.4 development, we changed the way swap file
262 * numbers are assigned and stored. The high 16 bits used
263 * to encode the SD index number. There used to be a call
264 * to storeDirProperFileno here that re-assigned the index
265 * bits. Now, for backwards compatibility, we just need
266 * to mask it off.
267 */
268 swapData.swap_filen &= 0x00FFFFFF;
269
270 debugs(47, 3, HERE << swap_log_op_str[(int) swapData.op] << " " <<
271 storeKeyText(swapData.key) << " "<< std::setfill('0') <<
272 std::hex << std::uppercase << std::setw(8) <<
273 swapData.swap_filen);
274
275 if (swapData.op == SWAP_LOG_ADD) {
276 (void) 0;
277 } else if (swapData.op == SWAP_LOG_DEL) {
278 /* Delete unless we already have a newer copy anywhere in any store */
279 /* this needs to become
280 * 1) unpack url
281 * 2) make synthetic request with headers ?? or otherwise search
282 * for a matching object in the store
283 * TODO FIXME change to new async api
284 */
285 currentEntry (Store::Root().get(swapData.key));
286
287 if (currentEntry() != NULL && swapData.lastref >= e->lastref) {
288 undoAdd();
289 --counts.objcount;
290 ++counts.cancelcount;
291 }
292 return;
293 } else {
294 const double
295 x = ::log(static_cast<double>(++counts.bad_log_op)) / ::log(10.0);
296
297 if (0.0 == x - (double) (int) x)
298 debugs(47, DBG_IMPORTANT, "WARNING: " << counts.bad_log_op << " invalid swap log entries found");
299
300 ++counts.invalid;
301
302 return;
303 }
304
305 ++counts.scancount; // XXX: should not this be incremented earlier?
306
307 if (!sd->validFileno(swapData.swap_filen, 0)) {
308 ++counts.invalid;
309 return;
310 }
311
312 if (EBIT_TEST(swapData.flags, KEY_PRIVATE)) {
313 ++counts.badflags;
314 return;
315 }
316
317 /* this needs to become
318 * 1) unpack url
319 * 2) make synthetic request with headers ?? or otherwise search
320 * for a matching object in the store
321 * TODO FIXME change to new async api
322 */
323 currentEntry (Store::Root().get(swapData.key));
324
325 int used; /* is swapfile already in use? */
326
327 used = sd->mapBitTest(swapData.swap_filen);
328
329 /* If this URL already exists in the cache, does the swap log
330 * appear to have a newer entry? Compare 'lastref' from the
331 * swap log to e->lastref. */
332 /* is the log entry newer than current entry? */
333 int disk_entry_newer = currentEntry() ? (swapData.lastref > currentEntry()->lastref ? 1 : 0) : 0;
334
335 if (used && !disk_entry_newer) {
336 /* log entry is old, ignore it */
337 ++counts.clashcount;
338 return;
339 } else if (used && currentEntry() && currentEntry()->swap_filen == swapData.swap_filen && currentEntry()->swap_dirn == sd->index) {
340 /* swapfile taken, same URL, newer, update meta */
341
342 if (currentEntry()->store_status == STORE_OK) {
343 currentEntry()->lastref = swapData.timestamp;
344 currentEntry()->timestamp = swapData.timestamp;
345 currentEntry()->expires = swapData.expires;
346 currentEntry()->lastmod = swapData.lastmod;
347 currentEntry()->flags = swapData.flags;
348 currentEntry()->refcount += swapData.refcount;
349 sd->dereference(*currentEntry());
350 } else {
351 debug_trap("commonUfsDirRebuildFromSwapLog: bad condition");
352 debugs(47, DBG_IMPORTANT, HERE << "bad condition");
353 }
354 return;
355 } else if (used) {
356 /* swapfile in use, not by this URL, log entry is newer */
357 /* This is sorta bad: the log entry should NOT be newer at this
358 * point. If the log is dirty, the filesize check should have
359 * caught this. If the log is clean, there should never be a
360 * newer entry. */
361 debugs(47, DBG_IMPORTANT, "WARNING: newer swaplog entry for dirno " <<
362 sd->index << ", fileno "<< std::setfill('0') << std::hex <<
363 std::uppercase << std::setw(8) << swapData.swap_filen);
364
365 /* I'm tempted to remove the swapfile here just to be safe,
366 * but there is a bad race condition in the NOVM version if
367 * the swapfile has recently been opened for writing, but
368 * not yet opened for reading. Because we can't map
369 * swapfiles back to StoreEntrys, we don't know the state
370 * of the entry using that file. */
371 /* We'll assume the existing entry is valid, probably because
372 * were in a slow rebuild and the the swap file number got taken
373 * and the validation procedure hasn't run. */
374 assert(flags.need_to_validate);
375 ++counts.clashcount;
376 return;
377 } else if (currentEntry() && !disk_entry_newer) {
378 /* key already exists, current entry is newer */
379 /* keep old, ignore new */
380 ++counts.dupcount;
381 return;
382 } else if (currentEntry()) {
383 /* key already exists, this swapfile not being used */
384 /* junk old, load new */
385 undoAdd();
386 --counts.objcount;
387 ++counts.dupcount;
388 } else {
389 /* URL doesnt exist, swapfile not in use */
390 /* load new */
391 (void) 0;
392 }
393
394 ++counts.objcount;
395
396 currentEntry(sd->addDiskRestore(swapData.key,
397 swapData.swap_filen,
398 swapData.swap_file_sz,
399 swapData.expires,
400 swapData.timestamp,
401 swapData.lastref,
402 swapData.lastmod,
403 swapData.refcount,
404 swapData.flags,
405 (int) flags.clean));
406
407 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
408 }
409
410 /// undo the effects of adding an entry in rebuildFromSwapLog()
411 void
412 Fs::Ufs::RebuildState::undoAdd()
413 {
414 StoreEntry *added = currentEntry();
415 assert(added);
416 currentEntry(NULL);
417
418 // TODO: Why bother with these two if we are going to release?!
419 added->expireNow();
420 added->releaseRequest();
421
422 if (added->swap_filen > -1) {
423 UFSSwapDir *sde = dynamic_cast<UFSSwapDir *>(INDEXSD(added->swap_dirn));
424 assert(sde);
425 sde->undoAddDiskRestore(added);
426 }
427
428 added->release();
429 }
430
431 int
432 Fs::Ufs::RebuildState::getNextFile(sfileno * filn_p, int *size)
433 {
434 int fd = -1;
435 int dirs_opened = 0;
436 debugs(47, 3, HERE << "flag=" << flags.init << ", " <<
437 sd->index << ": /"<< std::setfill('0') << std::hex <<
438 std::uppercase << std::setw(2) << curlvl1 << "/" << std::setw(2) <<
439 curlvl2);
440
441 if (done)
442 return -2;
443
444 while (fd < 0 && done == 0) {
445 fd = -1;
446
447 if (0 == flags.init) { /* initialize, open first file */
448 done = 0;
449 curlvl1 = 0;
450 curlvl2 = 0;
451 in_dir = 0;
452 flags.init = 1;
453 assert(Config.cacheSwap.n_configured > 0);
454 }
455
456 if (0 == in_dir) { /* we need to read in a new directory */
457 snprintf(fullpath, MAXPATHLEN, "%s/%02X/%02X",
458 sd->path,
459 curlvl1, curlvl2);
460
461 if (dirs_opened)
462 return -1;
463
464 td = opendir(fullpath);
465
466 ++dirs_opened;
467
468 if (td == NULL) {
469 debugs(47, DBG_IMPORTANT, HERE << "error in opendir (" << fullpath << "): " << xstrerror());
470 } else {
471 entry = readdir(td); /* skip . and .. */
472 entry = readdir(td);
473
474 if (entry == NULL && errno == ENOENT)
475 debugs(47, DBG_IMPORTANT, HERE << "WARNING: directory does not exist!");
476 debugs(47, 3, HERE << "Directory " << fullpath);
477 }
478 }
479
480 if (td != NULL && (entry = readdir(td)) != NULL) {
481 ++in_dir;
482
483 if (sscanf(entry->d_name, "%x", &fn) != 1) {
484 debugs(47, 3, HERE << "invalid entry " << entry->d_name);
485 continue;
486 }
487
488 if (!UFSSwapDir::FilenoBelongsHere(fn, sd->index, curlvl1, curlvl2)) {
489 debugs(47, 3, HERE << std::setfill('0') <<
490 std::hex << std::uppercase << std::setw(8) << fn <<
491 " does not belong in " << std::dec << sd->index << "/" <<
492 curlvl1 << "/" << curlvl2);
493
494 continue;
495 }
496
497 if (sd->mapBitTest(fn)) {
498 debugs(47, 3, HERE << "Locked, continuing with next.");
499 continue;
500 }
501
502 snprintf(fullfilename, MAXPATHLEN, "%s/%s",
503 fullpath, entry->d_name);
504 debugs(47, 3, HERE << "Opening " << fullfilename);
505 fd = file_open(fullfilename, O_RDONLY | O_BINARY);
506
507 if (fd < 0)
508 debugs(47, DBG_IMPORTANT, HERE << "error opening " << fullfilename << ": " << xstrerror());
509 else
510 ++store_open_disk_fd;
511
512 continue;
513 }
514
515 if (td != NULL)
516 closedir(td);
517
518 td = NULL;
519
520 in_dir = 0;
521
522 if (sd->validL2(++curlvl2))
523 continue;
524
525 curlvl2 = 0;
526
527 if (sd->validL1(++curlvl1))
528 continue;
529
530 curlvl1 = 0;
531
532 done = 1;
533 }
534
535 *filn_p = fn;
536 return fd;
537 }
538
539 bool
540 Fs::Ufs::RebuildState::error() const
541 {
542 return false;
543 }
544
545 bool
546 Fs::Ufs::RebuildState::isDone() const
547 {
548 return _done;
549 }
550
551 StoreEntry *
552 Fs::Ufs::RebuildState::currentItem()
553 {
554 return currentEntry();
555 }