]> git.ipfire.org Git - thirdparty/squid.git/blob - src/fs/ufs/RebuildState.cc
Merged from trunk
[thirdparty/squid.git] / src / fs / ufs / RebuildState.cc
1 /*
2 *
3 * DEBUG: section 47 Store Directory Routines
4 * AUTHOR: Robert Collins
5 *
6 * SQUID Web Proxy Cache http://www.squid-cache.org/
7 * ----------------------------------------------------------
8 *
9 * Squid is the result of efforts by numerous individuals from
10 * the Internet community; see the CONTRIBUTORS file for full
11 * details. Many organizations have provided support for Squid's
12 * development; see the SPONSORS file for full details. Squid is
13 * Copyrighted (C) 2001 by the Regents of the University of
14 * California; see the COPYRIGHT file for full details. Squid
15 * incorporates software developed and/or copyrighted by other
16 * sources; see the CREDITS file for full details.
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
31 *
32 */
33
34 #include "squid.h"
35 #include "disk.h"
36 #include "protos.h"
37 #include "RebuildState.h"
38 #include "SquidTime.h"
39 #include "store_key_md5.h"
40 #include "StoreSwapLogData.h"
41 #include "UFSSwapLogParser.h"
42
43 #if HAVE_MATH_H
44 #include <math.h>
45 #endif
46 #if HAVE_SYS_STAT_H
47 #include <sys/stat.h>
48 #endif
49 #if HAVE_ERRNO_H
50 #include <errno.h>
51 #endif
52
53 CBDATA_NAMESPACED_CLASS_INIT(Fs::Ufs,RebuildState);
54
55 Fs::Ufs::RebuildState::RebuildState(RefCount<UFSSwapDir> aSwapDir) :
56 sd (aSwapDir), LogParser(NULL), e(NULL), fromLog(true), _done (false)
57 {
58 /*
59 * If the swap.state file exists in the cache_dir, then
60 * we'll use commonUfsDirRebuildFromSwapLog(), otherwise we'll
61 * use commonUfsDirRebuildFromDirectory() to open up each file
62 * and suck in the meta data.
63 */
64 int clean = 0;
65 int zeroLengthLog = 0;
66 FILE *fp = sd->openTmpSwapLog(&clean, &zeroLengthLog);
67
68 if (fp && !zeroLengthLog)
69 LogParser = Fs::Ufs::UFSSwapLogParser::GetUFSSwapLogParser(fp);
70
71 if (LogParser == NULL ) {
72 fromLog = false;
73
74 if (fp != NULL)
75 fclose(fp);
76
77 } else {
78 fromLog = true;
79 flags.clean = (unsigned int) clean;
80 }
81
82 if (!clean)
83 flags.need_to_validate = 1;
84
85 debugs(47, DBG_IMPORTANT, "Rebuilding storage in " << sd->path << " (" <<
86 (clean ? "clean log" : (LogParser ? "dirty log" : "no log")) << ")");
87 }
88
89 Fs::Ufs::RebuildState::~RebuildState()
90 {
91 sd->closeTmpSwapLog();
92
93 if (LogParser)
94 delete LogParser;
95 }
96
97 void
98 Fs::Ufs::RebuildState::RebuildStep(void *data)
99 {
100 RebuildState *rb = (RebuildState *)data;
101 rb->rebuildStep();
102
103 if (!rb->isDone())
104 eventAdd("storeRebuild", RebuildStep, rb, 0.01, 1);
105 else {
106 -- StoreController::store_dirs_rebuilding;
107 storeRebuildComplete(&rb->counts);
108 delete rb;
109 }
110 }
111
112 /// load entries from swap.state or files until we run out of entries or time
113 void
114 Fs::Ufs::RebuildState::rebuildStep()
115 {
116 currentEntry(NULL);
117
118 // Balance our desire to maximize the number of entries processed at once
119 // (and, hence, minimize overheads and total rebuild time) with a
120 // requirement to also process Coordinator events, disk I/Os, etc.
121 const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
122 const timeval loopStart = current_time;
123
124 const int totalEntries = LogParser ? LogParser->SwapLogEntries() : -1;
125
126 while (!isDone()) {
127 if (fromLog)
128 rebuildFromSwapLog();
129 else
130 rebuildFromDirectory();
131
132 // TODO: teach storeRebuildProgress to handle totalEntries <= 0
133 if (totalEntries > 0 && (n_read % 4000 == 0))
134 storeRebuildProgress(sd->index, totalEntries, n_read);
135
136 if (opt_foreground_rebuild)
137 continue; // skip "few entries at a time" check below
138
139 getCurrentTime();
140 const double elapsedMsec = tvSubMsec(loopStart, current_time);
141 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
142 debugs(47, 5, HERE << "pausing after " << n_read << " entries in " <<
143 elapsedMsec << "ms; " << (elapsedMsec/n_read) << "ms per entry");
144 break;
145 }
146 }
147 }
148
149 /// process one cache file
150 void
151 Fs::Ufs::RebuildState::rebuildFromDirectory()
152 {
153 cache_key key[SQUID_MD5_DIGEST_LENGTH];
154
155 struct stat sb;
156 int fd = -1;
157 assert(this != NULL);
158 debugs(47, 3, HERE << "DIR #" << sd->index);
159
160 assert(fd == -1);
161 sfileno filn = 0;
162 int size;
163 fd = getNextFile(&filn, &size);
164
165 if (fd == -2) {
166 debugs(47, DBG_IMPORTANT, "Done scanning " << sd->path << " dir (" <<
167 n_read << " entries)");
168 _done = true;
169 return;
170 } else if (fd < 0) {
171 return;
172 }
173
174 assert(fd > -1);
175 /* lets get file stats here */
176
177 ++n_read;
178
179 if (fstat(fd, &sb) < 0) {
180 debugs(47, DBG_IMPORTANT, HERE << "fstat(FD " << fd << "): " << xstrerror());
181 file_close(fd);
182 --store_open_disk_fd;
183 fd = -1;
184 return;
185 }
186
187 MemBuf buf;
188 buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
189 if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
190 return;
191
192 StoreEntry tmpe;
193 const bool loaded = storeRebuildParseEntry(buf, tmpe, key, counts,
194 (int64_t)sb.st_size);
195
196 file_close(fd);
197 --store_open_disk_fd;
198 fd = -1;
199
200 if (!loaded) {
201 // XXX: shouldn't this be a call to commonUfsUnlink?
202 sd->unlinkFile(filn); // should we unlink in all failure cases?
203 return;
204 }
205
206 if (!storeRebuildKeepEntry(tmpe, key, counts))
207 return;
208
209 ++counts.objcount;
210 // tmpe.dump(5);
211 currentEntry(sd->addDiskRestore(key,
212 filn,
213 tmpe.swap_file_sz,
214 tmpe.expires,
215 tmpe.timestamp,
216 tmpe.lastref,
217 tmpe.lastmod,
218 tmpe.refcount, /* refcount */
219 tmpe.flags, /* flags */
220 (int) flags.clean));
221 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
222 }
223
224 StoreEntry *
225 Fs::Ufs::RebuildState::currentEntry() const
226 {
227 return e;
228 }
229
230 void
231 Fs::Ufs::RebuildState::currentEntry(StoreEntry *newValue)
232 {
233 e = newValue;
234 }
235
236 /// process one swap log entry
237 void
238 Fs::Ufs::RebuildState::rebuildFromSwapLog()
239 {
240 StoreSwapLogData swapData;
241
242 if (LogParser->ReadRecord(swapData) != 1) {
243 debugs(47, DBG_IMPORTANT, "Done reading " << sd->path << " swaplog (" << n_read << " entries)");
244 LogParser->Close();
245 delete LogParser;
246 LogParser = NULL;
247 _done = true;
248 return;
249 }
250
251 ++n_read;
252
253 if (!swapData.sane()) {
254 ++counts.invalid;
255 return;
256 }
257
258 /*
259 * BC: during 2.4 development, we changed the way swap file
260 * numbers are assigned and stored. The high 16 bits used
261 * to encode the SD index number. There used to be a call
262 * to storeDirProperFileno here that re-assigned the index
263 * bits. Now, for backwards compatibility, we just need
264 * to mask it off.
265 */
266 swapData.swap_filen &= 0x00FFFFFF;
267
268 debugs(47, 3, HERE << swap_log_op_str[(int) swapData.op] << " " <<
269 storeKeyText(swapData.key) << " "<< std::setfill('0') <<
270 std::hex << std::uppercase << std::setw(8) <<
271 swapData.swap_filen);
272
273 if (swapData.op == SWAP_LOG_ADD) {
274 (void) 0;
275 } else if (swapData.op == SWAP_LOG_DEL) {
276 /* Delete unless we already have a newer copy anywhere in any store */
277 /* this needs to become
278 * 1) unpack url
279 * 2) make synthetic request with headers ?? or otherwise search
280 * for a matching object in the store
281 * TODO FIXME change to new async api
282 */
283 currentEntry (Store::Root().get(swapData.key));
284
285 if (currentEntry() != NULL && swapData.lastref >= e->lastref) {
286 undoAdd();
287 --counts.objcount;
288 ++counts.cancelcount;
289 }
290 return;
291 } else {
292 const double
293 x = ::log(static_cast<double>(++counts.bad_log_op)) / ::log(10.0);
294
295 if (0.0 == x - (double) (int) x)
296 debugs(47, DBG_IMPORTANT, "WARNING: " << counts.bad_log_op << " invalid swap log entries found");
297
298 ++counts.invalid;
299
300 return;
301 }
302
303 ++counts.scancount; // XXX: should not this be incremented earlier?
304
305 if (!sd->validFileno(swapData.swap_filen, 0)) {
306 ++counts.invalid;
307 return;
308 }
309
310 if (EBIT_TEST(swapData.flags, KEY_PRIVATE)) {
311 ++counts.badflags;
312 return;
313 }
314
315 /* this needs to become
316 * 1) unpack url
317 * 2) make synthetic request with headers ?? or otherwise search
318 * for a matching object in the store
319 * TODO FIXME change to new async api
320 */
321 currentEntry (Store::Root().get(swapData.key));
322
323 int used; /* is swapfile already in use? */
324
325 used = sd->mapBitTest(swapData.swap_filen);
326
327 /* If this URL already exists in the cache, does the swap log
328 * appear to have a newer entry? Compare 'lastref' from the
329 * swap log to e->lastref. */
330 /* is the log entry newer than current entry? */
331 int disk_entry_newer = currentEntry() ? (swapData.lastref > currentEntry()->lastref ? 1 : 0) : 0;
332
333 if (used && !disk_entry_newer) {
334 /* log entry is old, ignore it */
335 ++counts.clashcount;
336 return;
337 } else if (used && currentEntry() && currentEntry()->swap_filen == swapData.swap_filen && currentEntry()->swap_dirn == sd->index) {
338 /* swapfile taken, same URL, newer, update meta */
339
340 if (currentEntry()->store_status == STORE_OK) {
341 currentEntry()->lastref = swapData.timestamp;
342 currentEntry()->timestamp = swapData.timestamp;
343 currentEntry()->expires = swapData.expires;
344 currentEntry()->lastmod = swapData.lastmod;
345 currentEntry()->flags = swapData.flags;
346 currentEntry()->refcount += swapData.refcount;
347 sd->dereference(*currentEntry());
348 } else {
349 debug_trap("commonUfsDirRebuildFromSwapLog: bad condition");
350 debugs(47, DBG_IMPORTANT, HERE << "bad condition");
351 }
352 return;
353 } else if (used) {
354 /* swapfile in use, not by this URL, log entry is newer */
355 /* This is sorta bad: the log entry should NOT be newer at this
356 * point. If the log is dirty, the filesize check should have
357 * caught this. If the log is clean, there should never be a
358 * newer entry. */
359 debugs(47, DBG_IMPORTANT, "WARNING: newer swaplog entry for dirno " <<
360 sd->index << ", fileno "<< std::setfill('0') << std::hex <<
361 std::uppercase << std::setw(8) << swapData.swap_filen);
362
363 /* I'm tempted to remove the swapfile here just to be safe,
364 * but there is a bad race condition in the NOVM version if
365 * the swapfile has recently been opened for writing, but
366 * not yet opened for reading. Because we can't map
367 * swapfiles back to StoreEntrys, we don't know the state
368 * of the entry using that file. */
369 /* We'll assume the existing entry is valid, probably because
370 * were in a slow rebuild and the the swap file number got taken
371 * and the validation procedure hasn't run. */
372 assert(flags.need_to_validate);
373 ++counts.clashcount;
374 return;
375 } else if (currentEntry() && !disk_entry_newer) {
376 /* key already exists, current entry is newer */
377 /* keep old, ignore new */
378 ++counts.dupcount;
379 return;
380 } else if (currentEntry()) {
381 /* key already exists, this swapfile not being used */
382 /* junk old, load new */
383 undoAdd();
384 --counts.objcount;
385 ++counts.dupcount;
386 } else {
387 /* URL doesnt exist, swapfile not in use */
388 /* load new */
389 (void) 0;
390 }
391
392 ++counts.objcount;
393
394 currentEntry(sd->addDiskRestore(swapData.key,
395 swapData.swap_filen,
396 swapData.swap_file_sz,
397 swapData.expires,
398 swapData.timestamp,
399 swapData.lastref,
400 swapData.lastmod,
401 swapData.refcount,
402 swapData.flags,
403 (int) flags.clean));
404
405 storeDirSwapLog(currentEntry(), SWAP_LOG_ADD);
406 }
407
408 /// undo the effects of adding an entry in rebuildFromSwapLog()
409 void
410 Fs::Ufs::RebuildState::undoAdd()
411 {
412 StoreEntry *added = currentEntry();
413 assert(added);
414 currentEntry(NULL);
415
416 // TODO: Why bother with these two if we are going to release?!
417 added->expireNow();
418 added->releaseRequest();
419
420 if (added->swap_filen > -1) {
421 UFSSwapDir *sde = dynamic_cast<UFSSwapDir *>(INDEXSD(added->swap_dirn));
422 assert(sde);
423 sde->undoAddDiskRestore(added);
424 }
425
426 added->release();
427 }
428
429 int
430 Fs::Ufs::RebuildState::getNextFile(sfileno * filn_p, int *size)
431 {
432 int fd = -1;
433 int dirs_opened = 0;
434 debugs(47, 3, HERE << "flag=" << flags.init << ", " <<
435 sd->index << ": /"<< std::setfill('0') << std::hex <<
436 std::uppercase << std::setw(2) << curlvl1 << "/" << std::setw(2) <<
437 curlvl2);
438
439 if (done)
440 return -2;
441
442 while (fd < 0 && done == 0) {
443 fd = -1;
444
445 if (0 == flags.init) { /* initialize, open first file */
446 done = 0;
447 curlvl1 = 0;
448 curlvl2 = 0;
449 in_dir = 0;
450 flags.init = 1;
451 assert(Config.cacheSwap.n_configured > 0);
452 }
453
454 if (0 == in_dir) { /* we need to read in a new directory */
455 snprintf(fullpath, MAXPATHLEN, "%s/%02X/%02X",
456 sd->path,
457 curlvl1, curlvl2);
458
459 if (dirs_opened)
460 return -1;
461
462 td = opendir(fullpath);
463
464 ++dirs_opened;
465
466 if (td == NULL) {
467 debugs(47, DBG_IMPORTANT, HERE << "error in opendir (" << fullpath << "): " << xstrerror());
468 } else {
469 entry = readdir(td); /* skip . and .. */
470 entry = readdir(td);
471
472 if (entry == NULL && errno == ENOENT)
473 debugs(47, DBG_IMPORTANT, HERE << "WARNING: directory does not exist!");
474 debugs(47, 3, HERE << "Directory " << fullpath);
475 }
476 }
477
478 if (td != NULL && (entry = readdir(td)) != NULL) {
479 ++in_dir;
480
481 if (sscanf(entry->d_name, "%x", &fn) != 1) {
482 debugs(47, 3, HERE << "invalid entry " << entry->d_name);
483 continue;
484 }
485
486 if (!UFSSwapDir::FilenoBelongsHere(fn, sd->index, curlvl1, curlvl2)) {
487 debugs(47, 3, HERE << std::setfill('0') <<
488 std::hex << std::uppercase << std::setw(8) << fn <<
489 " does not belong in " << std::dec << sd->index << "/" <<
490 curlvl1 << "/" << curlvl2);
491
492 continue;
493 }
494
495 if (sd->mapBitTest(fn)) {
496 debugs(47, 3, HERE << "Locked, continuing with next.");
497 continue;
498 }
499
500 snprintf(fullfilename, MAXPATHLEN, "%s/%s",
501 fullpath, entry->d_name);
502 debugs(47, 3, HERE << "Opening " << fullfilename);
503 fd = file_open(fullfilename, O_RDONLY | O_BINARY);
504
505 if (fd < 0)
506 debugs(47, DBG_IMPORTANT, HERE << "error opening " << fullfilename << ": " << xstrerror());
507 else
508 ++store_open_disk_fd;
509
510 continue;
511 }
512
513 if (td != NULL)
514 closedir(td);
515
516 td = NULL;
517
518 in_dir = 0;
519
520 if (sd->validL2(++curlvl2))
521 continue;
522
523 curlvl2 = 0;
524
525 if (sd->validL1(++curlvl1))
526 continue;
527
528 curlvl1 = 0;
529
530 done = 1;
531 }
532
533 *filn_p = fn;
534 return fd;
535 }
536
537 bool
538 Fs::Ufs::RebuildState::error() const
539 {
540 return false;
541 }
542
543 bool
544 Fs::Ufs::RebuildState::isDone() const
545 {
546 return _done;
547 }
548
549 StoreEntry *
550 Fs::Ufs::RebuildState::currentItem()
551 {
552 return currentEntry();
553 }