]> git.ipfire.org Git - thirdparty/squid.git/blob - src/fs/ufs/RebuildState.cc
Cleanup: un-wrap C++ header includes
[thirdparty/squid.git] / src / fs / ufs / RebuildState.cc
1 /*
2 * DEBUG: section 47 Store Directory Routines
3 * AUTHOR: Robert Collins
4 *
5 * SQUID Web Proxy Cache http://www.squid-cache.org/
6 * ----------------------------------------------------------
7 *
8 * Squid is the result of efforts by numerous individuals from
9 * the Internet community; see the CONTRIBUTORS file for full
10 * details. Many organizations have provided support for Squid's
11 * development; see the SPONSORS file for full details. Squid is
12 * Copyrighted (C) 2001 by the Regents of the University of
13 * California; see the COPYRIGHT file for full details. Squid
14 * incorporates software developed and/or copyrighted by other
15 * sources; see the CREDITS file for full details.
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
30 *
31 */
32
33 #include "squid.h"
34 #include "disk.h"
35 #include "globals.h"
36 #include "RebuildState.h"
37 #include "SquidConfig.h"
38 #include "SquidTime.h"
39 #include "store_key_md5.h"
40 #include "store_rebuild.h"
41 #include "StoreSwapLogData.h"
42 #include "tools.h"
43 #include "UFSSwapLogParser.h"
44
45 #include <cerrno>
46 #include <cmath>
47 #if HAVE_SYS_STAT_H
48 #include <sys/stat.h>
49 #endif
50
51 CBDATA_NAMESPACED_CLASS_INIT(Fs::Ufs,RebuildState);
52
53 Fs::Ufs::RebuildState::RebuildState(RefCount<UFSSwapDir> aSwapDir) :
54 sd (aSwapDir), LogParser(NULL), e(NULL), fromLog(true), _done (false)
55 {
56 /*
57 * If the swap.state file exists in the cache_dir, then
58 * we'll use commonUfsDirRebuildFromSwapLog(), otherwise we'll
59 * use commonUfsDirRebuildFromDirectory() to open up each file
60 * and suck in the meta data.
61 */
62 int clean = 0; //TODO: change to bool
63 int zeroLengthLog = 0;
64 FILE *fp = sd->openTmpSwapLog(&clean, &zeroLengthLog);
65
66 if (fp && !zeroLengthLog)
67 LogParser = Fs::Ufs::UFSSwapLogParser::GetUFSSwapLogParser(fp);
68
69 if (LogParser == NULL ) {
70 fromLog = false;
71
72 if (fp != NULL)
73 fclose(fp);
74
75 } else {
76 fromLog = true;
77 flags.clean = (clean != 0);
78 }
79
80 if (!clean)
81 flags.need_to_validate = true;
82
83 debugs(47, DBG_IMPORTANT, "Rebuilding storage in " << sd->path << " (" <<
84 (clean ? "clean log" : (LogParser ? "dirty log" : "no log")) << ")");
85 }
86
87 Fs::Ufs::RebuildState::~RebuildState()
88 {
89 sd->closeTmpSwapLog();
90
91 if (LogParser)
92 delete LogParser;
93 }
94
95 void
96 Fs::Ufs::RebuildState::RebuildStep(void *data)
97 {
98 RebuildState *rb = (RebuildState *)data;
99 rb->rebuildStep();
100
101 if (!rb->isDone())
102 eventAdd("storeRebuild", RebuildStep, rb, 0.01, 1);
103 else {
104 -- StoreController::store_dirs_rebuilding;
105 storeRebuildComplete(&rb->counts);
106 delete rb;
107 }
108 }
109
110 /// load entries from swap.state or files until we run out of entries or time
111 void
112 Fs::Ufs::RebuildState::rebuildStep()
113 {
114 currentEntry(NULL);
115
116 // Balance our desire to maximize the number of entries processed at once
117 // (and, hence, minimize overheads and total rebuild time) with a
118 // requirement to also process Coordinator events, disk I/Os, etc.
119 const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
120 const timeval loopStart = current_time;
121
122 const int totalEntries = LogParser ? LogParser->SwapLogEntries() : -1;
123
124 while (!isDone()) {
125 if (fromLog)
126 rebuildFromSwapLog();
127 else
128 rebuildFromDirectory();
129
130 // TODO: teach storeRebuildProgress to handle totalEntries <= 0
131 if (totalEntries > 0 && (n_read % 4000 == 0))
132 storeRebuildProgress(sd->index, totalEntries, n_read);
133
134 if (opt_foreground_rebuild)
135 continue; // skip "few entries at a time" check below
136
137 getCurrentTime();
138 const double elapsedMsec = tvSubMsec(loopStart, current_time);
139 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
140 debugs(47, 5, HERE << "pausing after " << n_read << " entries in " <<
141 elapsedMsec << "ms; " << (elapsedMsec/n_read) << "ms per entry");
142 break;
143 }
144 }
145 }
146
147 /// process one cache file
148 void
149 Fs::Ufs::RebuildState::rebuildFromDirectory()
150 {
151 cache_key key[SQUID_MD5_DIGEST_LENGTH];
152
153 struct stat sb;
154 int fd = -1;
155 assert(this != NULL);
156 debugs(47, 3, HERE << "DIR #" << sd->index);
157
158 assert(fd == -1);
159 sfileno filn = 0;
160 int size;
161 fd = getNextFile(&filn, &size);
162
163 if (fd == -2) {
164 debugs(47, DBG_IMPORTANT, "Done scanning " << sd->path << " dir (" <<
165 n_read << " entries)");
166 _done = true;
167 return;
168 } else if (fd < 0) {
169 return;
170 }
171
172 assert(fd > -1);
173 /* lets get file stats here */
174
175 ++n_read;
176
177 if (fstat(fd, &sb) < 0) {
178 debugs(47, DBG_IMPORTANT, HERE << "fstat(FD " << fd << "): " << xstrerror());
179 file_close(fd);
180 --store_open_disk_fd;
181 fd = -1;
182 return;
183 }
184
185 MemBuf buf;
186 buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
187 if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
188 return;
189
190 const uint64_t expectedSize = sb.st_size > 0 ?
191 static_cast<uint64_t>(sb.st_size) : 0;
192
193 StoreEntry tmpe;
194 const bool parsed = storeRebuildParseEntry(buf, tmpe, key, counts,
195 expectedSize);
196
197 file_close(fd);
198 --store_open_disk_fd;
199 fd = -1;
200
201 bool accepted = parsed && tmpe.swap_file_sz > 0;
202 if (parsed && !accepted) {
203 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring ufs cache entry with " <<
204 "unknown size: " << tmpe);
205 accepted = false;
206 }
207
208 if (!accepted) {
209 // XXX: shouldn't this be a call to commonUfsUnlink?
210 sd->unlinkFile(filn); // should we unlink in all failure cases?
211 return;
212 }
213
214 if (!storeRebuildKeepEntry(tmpe, key, counts))
215 return;
216
217 ++counts.objcount;
218 // tmpe.dump(5);
219 currentEntry(sd->addDiskRestore(key,
220 filn,
221 tmpe.swap_file_sz,
222 tmpe.expires,
223 tmpe.timestamp,
224 tmpe.lastref,
225 tmpe.lastmod,
226 tmpe.refcount, /* refcount */
227 tmpe.flags, /* flags */
228 (int) flags.clean));
229 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
230 }
231
232 StoreEntry *
233 Fs::Ufs::RebuildState::currentEntry() const
234 {
235 return e;
236 }
237
238 void
239 Fs::Ufs::RebuildState::currentEntry(StoreEntry *newValue)
240 {
241 e = newValue;
242 }
243
244 /// process one swap log entry
245 void
246 Fs::Ufs::RebuildState::rebuildFromSwapLog()
247 {
248 StoreSwapLogData swapData;
249
250 if (LogParser->ReadRecord(swapData) != 1) {
251 debugs(47, DBG_IMPORTANT, "Done reading " << sd->path << " swaplog (" << n_read << " entries)");
252 LogParser->Close();
253 delete LogParser;
254 LogParser = NULL;
255 _done = true;
256 return;
257 }
258
259 ++n_read;
260
261 if (!swapData.sane()) {
262 ++counts.invalid;
263 return;
264 }
265
266 /*
267 * BC: during 2.4 development, we changed the way swap file
268 * numbers are assigned and stored. The high 16 bits used
269 * to encode the SD index number. There used to be a call
270 * to storeDirProperFileno here that re-assigned the index
271 * bits. Now, for backwards compatibility, we just need
272 * to mask it off.
273 */
274 swapData.swap_filen &= 0x00FFFFFF;
275
276 debugs(47, 3, HERE << swap_log_op_str[(int) swapData.op] << " " <<
277 storeKeyText(swapData.key) << " "<< std::setfill('0') <<
278 std::hex << std::uppercase << std::setw(8) <<
279 swapData.swap_filen);
280
281 if (swapData.op == SWAP_LOG_ADD) {
282 (void) 0;
283 } else if (swapData.op == SWAP_LOG_DEL) {
284 /* Delete unless we already have a newer copy anywhere in any store */
285 /* this needs to become
286 * 1) unpack url
287 * 2) make synthetic request with headers ?? or otherwise search
288 * for a matching object in the store
289 * TODO FIXME change to new async api
290 */
291 currentEntry (Store::Root().get(swapData.key));
292
293 if (currentEntry() != NULL && swapData.lastref >= e->lastref) {
294 undoAdd();
295 --counts.objcount;
296 ++counts.cancelcount;
297 }
298 return;
299 } else {
300 const double
301 x = ::log(static_cast<double>(++counts.bad_log_op)) / ::log(10.0);
302
303 if (0.0 == x - (double) (int) x)
304 debugs(47, DBG_IMPORTANT, "WARNING: " << counts.bad_log_op << " invalid swap log entries found");
305
306 ++counts.invalid;
307
308 return;
309 }
310
311 ++counts.scancount; // XXX: should not this be incremented earlier?
312
313 if (!sd->validFileno(swapData.swap_filen, 0)) {
314 ++counts.invalid;
315 return;
316 }
317
318 if (EBIT_TEST(swapData.flags, KEY_PRIVATE)) {
319 ++counts.badflags;
320 return;
321 }
322
323 /* this needs to become
324 * 1) unpack url
325 * 2) make synthetic request with headers ?? or otherwise search
326 * for a matching object in the store
327 * TODO FIXME change to new async api
328 */
329 currentEntry (Store::Root().get(swapData.key));
330
331 int used; /* is swapfile already in use? */
332
333 used = sd->mapBitTest(swapData.swap_filen);
334
335 /* If this URL already exists in the cache, does the swap log
336 * appear to have a newer entry? Compare 'lastref' from the
337 * swap log to e->lastref. */
338 /* is the log entry newer than current entry? */
339 int disk_entry_newer = currentEntry() ? (swapData.lastref > currentEntry()->lastref ? 1 : 0) : 0;
340
341 if (used && !disk_entry_newer) {
342 /* log entry is old, ignore it */
343 ++counts.clashcount;
344 return;
345 } else if (used && currentEntry() && currentEntry()->swap_filen == swapData.swap_filen && currentEntry()->swap_dirn == sd->index) {
346 /* swapfile taken, same URL, newer, update meta */
347
348 if (currentEntry()->store_status == STORE_OK) {
349 currentEntry()->lastref = swapData.timestamp;
350 currentEntry()->timestamp = swapData.timestamp;
351 currentEntry()->expires = swapData.expires;
352 currentEntry()->lastmod = swapData.lastmod;
353 currentEntry()->flags = swapData.flags;
354 currentEntry()->refcount += swapData.refcount;
355 sd->dereference(*currentEntry(), false);
356 } else {
357 debug_trap("commonUfsDirRebuildFromSwapLog: bad condition");
358 debugs(47, DBG_IMPORTANT, HERE << "bad condition");
359 }
360 return;
361 } else if (used) {
362 /* swapfile in use, not by this URL, log entry is newer */
363 /* This is sorta bad: the log entry should NOT be newer at this
364 * point. If the log is dirty, the filesize check should have
365 * caught this. If the log is clean, there should never be a
366 * newer entry. */
367 debugs(47, DBG_IMPORTANT, "WARNING: newer swaplog entry for dirno " <<
368 sd->index << ", fileno "<< std::setfill('0') << std::hex <<
369 std::uppercase << std::setw(8) << swapData.swap_filen);
370
371 /* I'm tempted to remove the swapfile here just to be safe,
372 * but there is a bad race condition in the NOVM version if
373 * the swapfile has recently been opened for writing, but
374 * not yet opened for reading. Because we can't map
375 * swapfiles back to StoreEntrys, we don't know the state
376 * of the entry using that file. */
377 /* We'll assume the existing entry is valid, probably because
378 * were in a slow rebuild and the the swap file number got taken
379 * and the validation procedure hasn't run. */
380 assert(flags.need_to_validate);
381 ++counts.clashcount;
382 return;
383 } else if (currentEntry() && !disk_entry_newer) {
384 /* key already exists, current entry is newer */
385 /* keep old, ignore new */
386 ++counts.dupcount;
387 return;
388 } else if (currentEntry()) {
389 /* key already exists, this swapfile not being used */
390 /* junk old, load new */
391 undoAdd();
392 --counts.objcount;
393 ++counts.dupcount;
394 } else {
395 /* URL doesnt exist, swapfile not in use */
396 /* load new */
397 (void) 0;
398 }
399
400 ++counts.objcount;
401
402 currentEntry(sd->addDiskRestore(swapData.key,
403 swapData.swap_filen,
404 swapData.swap_file_sz,
405 swapData.expires,
406 swapData.timestamp,
407 swapData.lastref,
408 swapData.lastmod,
409 swapData.refcount,
410 swapData.flags,
411 (int) flags.clean));
412
413 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
414 }
415
416 /// undo the effects of adding an entry in rebuildFromSwapLog()
417 void
418 Fs::Ufs::RebuildState::undoAdd()
419 {
420 StoreEntry *added = currentEntry();
421 assert(added);
422 currentEntry(NULL);
423
424 // TODO: Why bother with these two if we are going to release?!
425 added->expireNow();
426 added->releaseRequest();
427
428 if (added->swap_filen > -1) {
429 SwapDir *someDir = INDEXSD(added->swap_dirn);
430 assert(someDir);
431 if (UFSSwapDir *ufsDir = dynamic_cast<UFSSwapDir*>(someDir))
432 ufsDir->undoAddDiskRestore(added);
433 // else the entry was loaded from and/or is currently in a non-UFS dir
434 // Thus, there is no use in preserving its disk file (the only purpose
435 // of undoAddDiskRestore!), even if we could. Instead, we release the
436 // the entry and [eventually] unlink its disk file or free its slot.
437 }
438
439 added->release();
440 }
441
442 int
443 Fs::Ufs::RebuildState::getNextFile(sfileno * filn_p, int *size)
444 {
445 int fd = -1;
446 int dirs_opened = 0;
447 debugs(47, 3, HERE << "flag=" << flags.init << ", " <<
448 sd->index << ": /"<< std::setfill('0') << std::hex <<
449 std::uppercase << std::setw(2) << curlvl1 << "/" << std::setw(2) <<
450 curlvl2);
451
452 if (done)
453 return -2;
454
455 while (fd < 0 && done == 0) {
456 fd = -1;
457
458 if (!flags.init) { /* initialize, open first file */
459 done = 0;
460 curlvl1 = 0;
461 curlvl2 = 0;
462 in_dir = 0;
463 flags.init = true;
464 assert(Config.cacheSwap.n_configured > 0);
465 }
466
467 if (0 == in_dir) { /* we need to read in a new directory */
468 snprintf(fullpath, MAXPATHLEN, "%s/%02X/%02X",
469 sd->path,
470 curlvl1, curlvl2);
471
472 if (dirs_opened)
473 return -1;
474
475 td = opendir(fullpath);
476
477 ++dirs_opened;
478
479 if (td == NULL) {
480 debugs(47, DBG_IMPORTANT, HERE << "error in opendir (" << fullpath << "): " << xstrerror());
481 } else {
482 entry = readdir(td); /* skip . and .. */
483 entry = readdir(td);
484
485 if (entry == NULL && errno == ENOENT)
486 debugs(47, DBG_IMPORTANT, HERE << "WARNING: directory does not exist!");
487 debugs(47, 3, HERE << "Directory " << fullpath);
488 }
489 }
490
491 if (td != NULL && (entry = readdir(td)) != NULL) {
492 ++in_dir;
493
494 if (sscanf(entry->d_name, "%x", &fn) != 1) {
495 debugs(47, 3, HERE << "invalid entry " << entry->d_name);
496 continue;
497 }
498
499 if (!UFSSwapDir::FilenoBelongsHere(fn, sd->index, curlvl1, curlvl2)) {
500 debugs(47, 3, HERE << std::setfill('0') <<
501 std::hex << std::uppercase << std::setw(8) << fn <<
502 " does not belong in " << std::dec << sd->index << "/" <<
503 curlvl1 << "/" << curlvl2);
504
505 continue;
506 }
507
508 if (sd->mapBitTest(fn)) {
509 debugs(47, 3, HERE << "Locked, continuing with next.");
510 continue;
511 }
512
513 snprintf(fullfilename, MAXPATHLEN, "%s/%s",
514 fullpath, entry->d_name);
515 debugs(47, 3, HERE << "Opening " << fullfilename);
516 fd = file_open(fullfilename, O_RDONLY | O_BINARY);
517
518 if (fd < 0)
519 debugs(47, DBG_IMPORTANT, HERE << "error opening " << fullfilename << ": " << xstrerror());
520 else
521 ++store_open_disk_fd;
522
523 continue;
524 }
525
526 if (td != NULL)
527 closedir(td);
528
529 td = NULL;
530
531 in_dir = 0;
532
533 if (sd->validL2(++curlvl2))
534 continue;
535
536 curlvl2 = 0;
537
538 if (sd->validL1(++curlvl1))
539 continue;
540
541 curlvl1 = 0;
542
543 done = 1;
544 }
545
546 *filn_p = fn;
547 return fd;
548 }
549
550 bool
551 Fs::Ufs::RebuildState::error() const
552 {
553 return false;
554 }
555
556 bool
557 Fs::Ufs::RebuildState::isDone() const
558 {
559 return _done;
560 }
561
562 StoreEntry *
563 Fs::Ufs::RebuildState::currentItem()
564 {
565 return currentEntry();
566 }