]> git.ipfire.org Git - thirdparty/squid.git/blob - src/fs/ufs/RebuildState.cc
Merged from trunk
[thirdparty/squid.git] / src / fs / ufs / RebuildState.cc
1 /*
2 * DEBUG: section 47 Store Directory Routines
3 * AUTHOR: Robert Collins
4 *
5 * SQUID Web Proxy Cache http://www.squid-cache.org/
6 * ----------------------------------------------------------
7 *
8 * Squid is the result of efforts by numerous individuals from
9 * the Internet community; see the CONTRIBUTORS file for full
10 * details. Many organizations have provided support for Squid's
11 * development; see the SPONSORS file for full details. Squid is
12 * Copyrighted (C) 2001 by the Regents of the University of
13 * California; see the COPYRIGHT file for full details. Squid
14 * incorporates software developed and/or copyrighted by other
15 * sources; see the CREDITS file for full details.
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
30 *
31 */
32
33 #include "squid.h"
34 #include "disk.h"
35 #include "globals.h"
36 #include "RebuildState.h"
37 #include "SquidTime.h"
38 #include "store_key_md5.h"
39 #include "store_rebuild.h"
40 #include "StoreSwapLogData.h"
41 #include "tools.h"
42 #include "UFSSwapLogParser.h"
43
44 #if HAVE_MATH_H
45 #include <math.h>
46 #endif
47 #if HAVE_SYS_STAT_H
48 #include <sys/stat.h>
49 #endif
50 #if HAVE_ERRNO_H
51 #include <errno.h>
52 #endif
53
54 CBDATA_NAMESPACED_CLASS_INIT(Fs::Ufs,RebuildState);
55
56 Fs::Ufs::RebuildState::RebuildState(RefCount<UFSSwapDir> aSwapDir) :
57 sd (aSwapDir), LogParser(NULL), e(NULL), fromLog(true), _done (false)
58 {
59 /*
60 * If the swap.state file exists in the cache_dir, then
61 * we'll use commonUfsDirRebuildFromSwapLog(), otherwise we'll
62 * use commonUfsDirRebuildFromDirectory() to open up each file
63 * and suck in the meta data.
64 */
65 int clean = 0;
66 int zeroLengthLog = 0;
67 FILE *fp = sd->openTmpSwapLog(&clean, &zeroLengthLog);
68
69 if (fp && !zeroLengthLog)
70 LogParser = Fs::Ufs::UFSSwapLogParser::GetUFSSwapLogParser(fp);
71
72 if (LogParser == NULL ) {
73 fromLog = false;
74
75 if (fp != NULL)
76 fclose(fp);
77
78 } else {
79 fromLog = true;
80 flags.clean = (unsigned int) clean;
81 }
82
83 if (!clean)
84 flags.need_to_validate = 1;
85
86 debugs(47, DBG_IMPORTANT, "Rebuilding storage in " << sd->path << " (" <<
87 (clean ? "clean log" : (LogParser ? "dirty log" : "no log")) << ")");
88 }
89
90 Fs::Ufs::RebuildState::~RebuildState()
91 {
92 sd->closeTmpSwapLog();
93
94 if (LogParser)
95 delete LogParser;
96 }
97
98 void
99 Fs::Ufs::RebuildState::RebuildStep(void *data)
100 {
101 RebuildState *rb = (RebuildState *)data;
102 rb->rebuildStep();
103
104 if (!rb->isDone())
105 eventAdd("storeRebuild", RebuildStep, rb, 0.01, 1);
106 else {
107 -- StoreController::store_dirs_rebuilding;
108 storeRebuildComplete(&rb->counts);
109 delete rb;
110 }
111 }
112
113 /// load entries from swap.state or files until we run out of entries or time
114 void
115 Fs::Ufs::RebuildState::rebuildStep()
116 {
117 currentEntry(NULL);
118
119 // Balance our desire to maximize the number of entries processed at once
120 // (and, hence, minimize overheads and total rebuild time) with a
121 // requirement to also process Coordinator events, disk I/Os, etc.
122 const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
123 const timeval loopStart = current_time;
124
125 const int totalEntries = LogParser ? LogParser->SwapLogEntries() : -1;
126
127 while (!isDone()) {
128 if (fromLog)
129 rebuildFromSwapLog();
130 else
131 rebuildFromDirectory();
132
133 // TODO: teach storeRebuildProgress to handle totalEntries <= 0
134 if (totalEntries > 0 && (n_read % 4000 == 0))
135 storeRebuildProgress(sd->index, totalEntries, n_read);
136
137 if (opt_foreground_rebuild)
138 continue; // skip "few entries at a time" check below
139
140 getCurrentTime();
141 const double elapsedMsec = tvSubMsec(loopStart, current_time);
142 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
143 debugs(47, 5, HERE << "pausing after " << n_read << " entries in " <<
144 elapsedMsec << "ms; " << (elapsedMsec/n_read) << "ms per entry");
145 break;
146 }
147 }
148 }
149
150 /// process one cache file
151 void
152 Fs::Ufs::RebuildState::rebuildFromDirectory()
153 {
154 cache_key key[SQUID_MD5_DIGEST_LENGTH];
155
156 struct stat sb;
157 int fd = -1;
158 assert(this != NULL);
159 debugs(47, 3, HERE << "DIR #" << sd->index);
160
161 assert(fd == -1);
162 sfileno filn = 0;
163 int size;
164 fd = getNextFile(&filn, &size);
165
166 if (fd == -2) {
167 debugs(47, DBG_IMPORTANT, "Done scanning " << sd->path << " dir (" <<
168 n_read << " entries)");
169 _done = true;
170 return;
171 } else if (fd < 0) {
172 return;
173 }
174
175 assert(fd > -1);
176 /* lets get file stats here */
177
178 ++n_read;
179
180 if (fstat(fd, &sb) < 0) {
181 debugs(47, DBG_IMPORTANT, HERE << "fstat(FD " << fd << "): " << xstrerror());
182 file_close(fd);
183 --store_open_disk_fd;
184 fd = -1;
185 return;
186 }
187
188 MemBuf buf;
189 buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
190 if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
191 return;
192
193 StoreEntry tmpe;
194 const bool loaded = storeRebuildParseEntry(buf, tmpe, key, counts,
195 (int64_t)sb.st_size);
196
197 file_close(fd);
198 --store_open_disk_fd;
199 fd = -1;
200
201 if (!loaded) {
202 // XXX: shouldn't this be a call to commonUfsUnlink?
203 sd->unlinkFile(filn); // should we unlink in all failure cases?
204 return;
205 }
206
207 if (!storeRebuildKeepEntry(tmpe, key, counts))
208 return;
209
210 ++counts.objcount;
211 // tmpe.dump(5);
212 currentEntry(sd->addDiskRestore(key,
213 filn,
214 tmpe.swap_file_sz,
215 tmpe.expires,
216 tmpe.timestamp,
217 tmpe.lastref,
218 tmpe.lastmod,
219 tmpe.refcount, /* refcount */
220 tmpe.flags, /* flags */
221 (int) flags.clean));
222 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
223 }
224
225 StoreEntry *
226 Fs::Ufs::RebuildState::currentEntry() const
227 {
228 return e;
229 }
230
231 void
232 Fs::Ufs::RebuildState::currentEntry(StoreEntry *newValue)
233 {
234 e = newValue;
235 }
236
237 /// process one swap log entry
238 void
239 Fs::Ufs::RebuildState::rebuildFromSwapLog()
240 {
241 StoreSwapLogData swapData;
242
243 if (LogParser->ReadRecord(swapData) != 1) {
244 debugs(47, DBG_IMPORTANT, "Done reading " << sd->path << " swaplog (" << n_read << " entries)");
245 LogParser->Close();
246 delete LogParser;
247 LogParser = NULL;
248 _done = true;
249 return;
250 }
251
252 ++n_read;
253
254 if (!swapData.sane()) {
255 ++counts.invalid;
256 return;
257 }
258
259 /*
260 * BC: during 2.4 development, we changed the way swap file
261 * numbers are assigned and stored. The high 16 bits used
262 * to encode the SD index number. There used to be a call
263 * to storeDirProperFileno here that re-assigned the index
264 * bits. Now, for backwards compatibility, we just need
265 * to mask it off.
266 */
267 swapData.swap_filen &= 0x00FFFFFF;
268
269 debugs(47, 3, HERE << swap_log_op_str[(int) swapData.op] << " " <<
270 storeKeyText(swapData.key) << " "<< std::setfill('0') <<
271 std::hex << std::uppercase << std::setw(8) <<
272 swapData.swap_filen);
273
274 if (swapData.op == SWAP_LOG_ADD) {
275 (void) 0;
276 } else if (swapData.op == SWAP_LOG_DEL) {
277 /* Delete unless we already have a newer copy anywhere in any store */
278 /* this needs to become
279 * 1) unpack url
280 * 2) make synthetic request with headers ?? or otherwise search
281 * for a matching object in the store
282 * TODO FIXME change to new async api
283 */
284 currentEntry (Store::Root().get(swapData.key));
285
286 if (currentEntry() != NULL && swapData.lastref >= e->lastref) {
287 undoAdd();
288 --counts.objcount;
289 ++counts.cancelcount;
290 }
291 return;
292 } else {
293 const double
294 x = ::log(static_cast<double>(++counts.bad_log_op)) / ::log(10.0);
295
296 if (0.0 == x - (double) (int) x)
297 debugs(47, DBG_IMPORTANT, "WARNING: " << counts.bad_log_op << " invalid swap log entries found");
298
299 ++counts.invalid;
300
301 return;
302 }
303
304 ++counts.scancount; // XXX: should not this be incremented earlier?
305
306 if (!sd->validFileno(swapData.swap_filen, 0)) {
307 ++counts.invalid;
308 return;
309 }
310
311 if (EBIT_TEST(swapData.flags, KEY_PRIVATE)) {
312 ++counts.badflags;
313 return;
314 }
315
316 /* this needs to become
317 * 1) unpack url
318 * 2) make synthetic request with headers ?? or otherwise search
319 * for a matching object in the store
320 * TODO FIXME change to new async api
321 */
322 currentEntry (Store::Root().get(swapData.key));
323
324 int used; /* is swapfile already in use? */
325
326 used = sd->mapBitTest(swapData.swap_filen);
327
328 /* If this URL already exists in the cache, does the swap log
329 * appear to have a newer entry? Compare 'lastref' from the
330 * swap log to e->lastref. */
331 /* is the log entry newer than current entry? */
332 int disk_entry_newer = currentEntry() ? (swapData.lastref > currentEntry()->lastref ? 1 : 0) : 0;
333
334 if (used && !disk_entry_newer) {
335 /* log entry is old, ignore it */
336 ++counts.clashcount;
337 return;
338 } else if (used && currentEntry() && currentEntry()->swap_filen == swapData.swap_filen && currentEntry()->swap_dirn == sd->index) {
339 /* swapfile taken, same URL, newer, update meta */
340
341 if (currentEntry()->store_status == STORE_OK) {
342 currentEntry()->lastref = swapData.timestamp;
343 currentEntry()->timestamp = swapData.timestamp;
344 currentEntry()->expires = swapData.expires;
345 currentEntry()->lastmod = swapData.lastmod;
346 currentEntry()->flags = swapData.flags;
347 currentEntry()->refcount += swapData.refcount;
348 sd->dereference(*currentEntry());
349 } else {
350 debug_trap("commonUfsDirRebuildFromSwapLog: bad condition");
351 debugs(47, DBG_IMPORTANT, HERE << "bad condition");
352 }
353 return;
354 } else if (used) {
355 /* swapfile in use, not by this URL, log entry is newer */
356 /* This is sorta bad: the log entry should NOT be newer at this
357 * point. If the log is dirty, the filesize check should have
358 * caught this. If the log is clean, there should never be a
359 * newer entry. */
360 debugs(47, DBG_IMPORTANT, "WARNING: newer swaplog entry for dirno " <<
361 sd->index << ", fileno "<< std::setfill('0') << std::hex <<
362 std::uppercase << std::setw(8) << swapData.swap_filen);
363
364 /* I'm tempted to remove the swapfile here just to be safe,
365 * but there is a bad race condition in the NOVM version if
366 * the swapfile has recently been opened for writing, but
367 * not yet opened for reading. Because we can't map
368 * swapfiles back to StoreEntrys, we don't know the state
369 * of the entry using that file. */
370 /* We'll assume the existing entry is valid, probably because
371 * were in a slow rebuild and the the swap file number got taken
372 * and the validation procedure hasn't run. */
373 assert(flags.need_to_validate);
374 ++counts.clashcount;
375 return;
376 } else if (currentEntry() && !disk_entry_newer) {
377 /* key already exists, current entry is newer */
378 /* keep old, ignore new */
379 ++counts.dupcount;
380 return;
381 } else if (currentEntry()) {
382 /* key already exists, this swapfile not being used */
383 /* junk old, load new */
384 undoAdd();
385 --counts.objcount;
386 ++counts.dupcount;
387 } else {
388 /* URL doesnt exist, swapfile not in use */
389 /* load new */
390 (void) 0;
391 }
392
393 ++counts.objcount;
394
395 currentEntry(sd->addDiskRestore(swapData.key,
396 swapData.swap_filen,
397 swapData.swap_file_sz,
398 swapData.expires,
399 swapData.timestamp,
400 swapData.lastref,
401 swapData.lastmod,
402 swapData.refcount,
403 swapData.flags,
404 (int) flags.clean));
405
406 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
407 }
408
409 /// undo the effects of adding an entry in rebuildFromSwapLog()
410 void
411 Fs::Ufs::RebuildState::undoAdd()
412 {
413 StoreEntry *added = currentEntry();
414 assert(added);
415 currentEntry(NULL);
416
417 // TODO: Why bother with these two if we are going to release?!
418 added->expireNow();
419 added->releaseRequest();
420
421 if (added->swap_filen > -1) {
422 UFSSwapDir *sde = dynamic_cast<UFSSwapDir *>(INDEXSD(added->swap_dirn));
423 assert(sde);
424 sde->undoAddDiskRestore(added);
425 }
426
427 added->release();
428 }
429
430 int
431 Fs::Ufs::RebuildState::getNextFile(sfileno * filn_p, int *size)
432 {
433 int fd = -1;
434 int dirs_opened = 0;
435 debugs(47, 3, HERE << "flag=" << flags.init << ", " <<
436 sd->index << ": /"<< std::setfill('0') << std::hex <<
437 std::uppercase << std::setw(2) << curlvl1 << "/" << std::setw(2) <<
438 curlvl2);
439
440 if (done)
441 return -2;
442
443 while (fd < 0 && done == 0) {
444 fd = -1;
445
446 if (0 == flags.init) { /* initialize, open first file */
447 done = 0;
448 curlvl1 = 0;
449 curlvl2 = 0;
450 in_dir = 0;
451 flags.init = 1;
452 assert(Config.cacheSwap.n_configured > 0);
453 }
454
455 if (0 == in_dir) { /* we need to read in a new directory */
456 snprintf(fullpath, MAXPATHLEN, "%s/%02X/%02X",
457 sd->path,
458 curlvl1, curlvl2);
459
460 if (dirs_opened)
461 return -1;
462
463 td = opendir(fullpath);
464
465 ++dirs_opened;
466
467 if (td == NULL) {
468 debugs(47, DBG_IMPORTANT, HERE << "error in opendir (" << fullpath << "): " << xstrerror());
469 } else {
470 entry = readdir(td); /* skip . and .. */
471 entry = readdir(td);
472
473 if (entry == NULL && errno == ENOENT)
474 debugs(47, DBG_IMPORTANT, HERE << "WARNING: directory does not exist!");
475 debugs(47, 3, HERE << "Directory " << fullpath);
476 }
477 }
478
479 if (td != NULL && (entry = readdir(td)) != NULL) {
480 ++in_dir;
481
482 if (sscanf(entry->d_name, "%x", &fn) != 1) {
483 debugs(47, 3, HERE << "invalid entry " << entry->d_name);
484 continue;
485 }
486
487 if (!UFSSwapDir::FilenoBelongsHere(fn, sd->index, curlvl1, curlvl2)) {
488 debugs(47, 3, HERE << std::setfill('0') <<
489 std::hex << std::uppercase << std::setw(8) << fn <<
490 " does not belong in " << std::dec << sd->index << "/" <<
491 curlvl1 << "/" << curlvl2);
492
493 continue;
494 }
495
496 if (sd->mapBitTest(fn)) {
497 debugs(47, 3, HERE << "Locked, continuing with next.");
498 continue;
499 }
500
501 snprintf(fullfilename, MAXPATHLEN, "%s/%s",
502 fullpath, entry->d_name);
503 debugs(47, 3, HERE << "Opening " << fullfilename);
504 fd = file_open(fullfilename, O_RDONLY | O_BINARY);
505
506 if (fd < 0)
507 debugs(47, DBG_IMPORTANT, HERE << "error opening " << fullfilename << ": " << xstrerror());
508 else
509 ++store_open_disk_fd;
510
511 continue;
512 }
513
514 if (td != NULL)
515 closedir(td);
516
517 td = NULL;
518
519 in_dir = 0;
520
521 if (sd->validL2(++curlvl2))
522 continue;
523
524 curlvl2 = 0;
525
526 if (sd->validL1(++curlvl1))
527 continue;
528
529 curlvl1 = 0;
530
531 done = 1;
532 }
533
534 *filn_p = fn;
535 return fd;
536 }
537
538 bool
539 Fs::Ufs::RebuildState::error() const
540 {
541 return false;
542 }
543
544 bool
545 Fs::Ufs::RebuildState::isDone() const
546 {
547 return _done;
548 }
549
550 StoreEntry *
551 Fs::Ufs::RebuildState::currentItem()
552 {
553 return currentEntry();
554 }