]> git.ipfire.org Git - thirdparty/squid.git/blame - src/disk.cc
Boilerplate: update copyright blurbs on src/
[thirdparty/squid.git] / src / disk.cc
CommitLineData
30a4f2a8 1/*
bbc27441 2 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
e25c139f 3 *
bbc27441
AJ
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
30a4f2a8 7 */
ed43818f 8
bbc27441
AJ
9/* DEBUG: section 06 Disk I/O Routines */
10
582c2af2 11#include "squid.h"
d841c88d 12#include "comm/Loops.h"
65914896 13#include "disk.h"
c4ad1349 14#include "fd.h"
528b2c61 15#include "fde.h"
67679543 16#include "globals.h"
8a89c28f 17#include "Mem.h"
0eb49b6d 18#include "MemBuf.h"
582c2af2 19#include "profiler/Profiler.h"
e4f1fdae 20#include "StatCounters.h"
090089c4 21
1a30fdf5 22#include <cerrno>
21d845b1 23
95d15928 24static PF diskHandleRead;
25static PF diskHandleWrite;
24382924 26
be266cb2 27#if _SQUID_WINDOWS_ || _SQUID_OS2_
1a94f598 28static int
29diskWriteIsComplete(int fd)
30{
31 return fd_table[fd].disk.write_q ? 0 : 1;
32}
62e76326 33
1a94f598 34#endif
35
04cece06 36void
0673c0ba 37disk_init(void)
090089c4 38{
60df005c 39 (void) 0;
090089c4 40}
41
59a09b98
FC
42/* hack needed on SunStudio to avoid linkage convention mismatch */
43static void cxx_xfree(void *ptr)
44{
f673997d 45 xfree(ptr);
59a09b98
FC
46}
47
1a94f598 48/*
49 * opens a disk file specified by 'path'. This function always
50 * blocks! There is no callback.
51 */
684c2720 52int
1a94f598 53file_open(const char *path, int mode)
090089c4 54{
090089c4 55 int fd;
88bfe092 56 PROF_start(file_open);
62e76326 57
5d1a7121 58 if (FILE_MODE(mode) == O_WRONLY)
62e76326 59 mode |= O_APPEND;
60
b870e0b4 61 errno = 0;
62e76326 62
0a0bf5db 63 fd = open(path, mode, 0644);
62e76326 64
95dc7ff4 65 ++ statCounter.syscalls.disk.opens;
62e76326 66
0a0bf5db 67 if (fd < 0) {
bf8fe701 68 debugs(50, 3, "file_open: error opening file " << path << ": " << xstrerror());
62e76326 69 fd = DISK_ERROR;
2391a162 70 } else {
bf8fe701 71 debugs(6, 5, "file_open: FD " << fd);
62e76326 72 commSetCloseOnExec(fd);
73 fd_open(fd, FD_FILE, path);
090089c4 74 }
62e76326 75
88bfe092 76 PROF_stop(file_open);
2391a162 77 return fd;
0a0bf5db 78}
79
090089c4 80/* close a disk file. */
95d15928 81void
b8d8561b 82file_close(int fd)
090089c4 83{
76f87348 84 fde *F = &fd_table[fd];
2391a162 85 PF *read_callback;
88bfe092 86 PROF_start(file_close);
25354045 87 assert(fd >= 0);
60c0b5a2 88 assert(F->flags.open);
62e76326 89
2391a162 90 if ((read_callback = F->read_handler)) {
62e76326 91 F->read_handler = NULL;
92 read_callback(-1, F->read_data);
65d548bf 93 }
62e76326 94
0cd30ba5 95 if (F->flags.write_daemon) {
be266cb2 96#if _SQUID_WINDOWS_ || _SQUID_OS2_
62e76326 97 /*
98 * on some operating systems, you can not delete or rename
99 * open files, so we won't allow delayed close.
100 */
62e76326 101 while (!diskWriteIsComplete(fd))
102 diskHandleWrite(fd, NULL);
cd377065 103#else
be4d35dc 104 F->flags.close_request = true;
bf8fe701 105 debugs(6, 2, "file_close: FD " << fd << ", delaying close");
62e76326 106 PROF_stop(file_close);
62e76326 107 return;
cd377065 108#endif
62e76326 109
fb247d78 110 }
62e76326 111
65d548bf 112 /*
113 * Assert there is no write callback. Otherwise we might be
114 * leaking write state data by closing the descriptor
115 */
116 assert(F->write_handler == NULL);
62e76326 117
42f99d0d 118#if CALL_FSYNC_BEFORE_CLOSE
62e76326 119
42f99d0d 120 fsync(fd);
62e76326 121
42f99d0d 122#endif
62e76326 123
95d15928 124 close(fd);
62e76326 125
bf8fe701 126 debugs(6, F->flags.close_request ? 2 : 5, "file_close: FD " << fd << " really closing\n");
62e76326 127
6cf028ab 128 fd_close(fd);
62e76326 129
95dc7ff4 130 ++ statCounter.syscalls.disk.closes;
62e76326 131
88bfe092 132 PROF_stop(file_close);
090089c4 133}
134
f02b8498 135/*
136 * This function has the purpose of combining multiple writes. This is
137 * to facilitate the ASYNC_IO option since it can only guarantee 1
138 * write to a file per trip around the comm.c select() loop. That's bad
139 * because more than 1 write can be made to the access.log file per
140 * trip, and so this code is purely designed to help batch multiple
141 * sequential writes to the access.log file. Squid will never issue
142 * multiple writes for any other file type during 1 trip around the
143 * select() loop. --SLF
144 */
582b6456 145static void
5fed1735 146diskCombineWrites(_fde_disk *fdd)
090089c4 147{
f02b8498 148 /*
149 * We need to combine multiple write requests on an FD's write
150 * queue But only if we don't need to seek() in between them, ugh!
151 * XXX This currently ignores any seeks (file_offset)
152 */
62e76326 153
26ac0430 154 if (fdd->write_q != NULL && fdd->write_q->next != NULL) {
b115733c 155 int len = 0;
62e76326 156
b115733c 157 for (dwrite_q *q = fdd->write_q; q != NULL; q = q->next)
62e76326 158 len += q->len - q->buf_offset;
159
b115733c 160 dwrite_q *wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
62e76326 161
162 wq->buf = (char *)xmalloc(len);
163
164 wq->len = 0;
165
166 wq->buf_offset = 0;
167
168 wq->next = NULL;
169
59a09b98 170 wq->free_func = cxx_xfree;
62e76326 171
b115733c
AJ
172 while (fdd->write_q != NULL) {
173 dwrite_q *q = fdd->write_q;
174
62e76326 175 len = q->len - q->buf_offset;
41d00cd3 176 memcpy(wq->buf + wq->len, q->buf + q->buf_offset, len);
62e76326 177 wq->len += len;
178 fdd->write_q = q->next;
179
180 if (q->free_func)
6ca34f6f 181 q->free_func(q->buf);
62e76326 182
b115733c
AJ
183 memFree(q, MEM_DWRITE_Q);
184 };
62e76326 185
186 fdd->write_q_tail = wq;
187
188 fdd->write_q = wq;
0a0bf5db 189 }
f02b8498 190}
191
192/* write handler */
193static void
194diskHandleWrite(int fd, void *notused)
195{
196 int len = 0;
f02b8498 197 fde *F = &fd_table[fd];
62e76326 198
5fed1735 199 _fde_disk *fdd = &F->disk;
2391a162 200 dwrite_q *q = fdd->write_q;
201 int status = DISK_OK;
be4d35dc 202 bool do_close;
62e76326 203
2391a162 204 if (NULL == q)
62e76326 205 return;
206
88bfe092 207 PROF_start(diskHandleWrite);
62e76326 208
bf8fe701 209 debugs(6, 3, "diskHandleWrite: FD " << fd);
62e76326 210
be4d35dc 211 F->flags.write_daemon = false;
62e76326 212
8350fe9b 213 assert(fdd->write_q != NULL);
62e76326 214
d377699f 215 assert(fdd->write_q->len > fdd->write_q->buf_offset);
62e76326 216
e2851fe7
AR
217 debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " <<
218 (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes at " <<
219 fdd->write_q->file_offset);
62e76326 220
b870e0b4 221 errno = 0;
62e76326 222
cd748f27 223 if (fdd->write_q->file_offset != -1)
e2851fe7 224 lseek(fd, fdd->write_q->file_offset, SEEK_SET); /* XXX ignore return? */
62e76326 225
1f7c9178 226 len = FD_WRITE_METHOD(fd,
62e76326 227 fdd->write_q->buf + fdd->write_q->buf_offset,
228 fdd->write_q->len - fdd->write_q->buf_offset);
229
bf8fe701 230 debugs(6, 3, "diskHandleWrite: FD " << fd << " len = " << len);
62e76326 231
95dc7ff4 232 ++ statCounter.syscalls.disk.writes;
62e76326 233
6cf028ab 234 fd_bytes(fd, len, FD_WRITE);
62e76326 235
0a0bf5db 236 if (len < 0) {
62e76326 237 if (!ignoreErrno(errno)) {
238 status = errno == ENOSPC ? DISK_NO_SPACE_LEFT : DISK_ERROR;
e0236918 239 debugs(50, DBG_IMPORTANT, "diskHandleWrite: FD " << fd << ": disk write error: " << xstrerror());
bf8fe701 240
62e76326 241 /*
242 * If there is no write callback, then this file is
243 * most likely something important like a log file, or
244 * an interprocess pipe. Its not a swapfile. We feel
245 * that a write failure on a log file is rather important,
246 * and Squid doesn't otherwise deal with this condition.
247 * So to get the administrators attention, we exit with
248 * a fatal message.
249 */
250
251 if (fdd->wrt_handle == NULL)
252 fatal("Write failure -- check your disk space and cache.log");
253
254 /*
255 * If there is a write failure, then we notify the
256 * upper layer via the callback, at the end of this
257 * function. Meanwhile, flush all pending buffers
258 * here. Let the upper layer decide how to handle the
259 * failure. This will prevent experiencing multiple,
260 * repeated write failures for the same FD because of
261 * the queued data.
262 */
263 do {
264 fdd->write_q = q->next;
265
266 if (q->free_func)
6ca34f6f 267 q->free_func(q->buf);
62e76326 268
269 if (q) {
270 memFree(q, MEM_DWRITE_Q);
271 q = NULL;
272 }
273 } while ((q = fdd->write_q));
274 }
275
276 len = 0;
0a0bf5db 277 }
62e76326 278
8350fe9b 279 if (q != NULL) {
62e76326 280 /* q might become NULL from write failure above */
281 q->buf_offset += len;
282
283 if (q->buf_offset > q->len)
e0236918 284 debugs(50, DBG_IMPORTANT, "diskHandleWriteComplete: q->buf_offset > q->len (" <<
bf8fe701 285 q << "," << (int) q->buf_offset << ", " << q->len << ", " <<
286 len << " FD " << fd << ")");
287
62e76326 288 assert(q->buf_offset <= q->len);
289
290 if (q->buf_offset == q->len) {
291 /* complete write */
292 fdd->write_q = q->next;
293
294 if (q->free_func)
6ca34f6f 295 q->free_func(q->buf);
62e76326 296
297 if (q) {
298 memFree(q, MEM_DWRITE_Q);
299 q = NULL;
300 }
301 }
090089c4 302 }
62e76326 303
de866d20 304 if (fdd->write_q == NULL) {
62e76326 305 /* no more data */
306 fdd->write_q_tail = NULL;
de866d20 307 } else {
62e76326 308 /* another block is queued */
309 diskCombineWrites(fdd);
d841c88d 310 Comm::SetSelect(fd, COMM_SELECT_WRITE, diskHandleWrite, NULL, 0);
be4d35dc 311 F->flags.write_daemon = true;
4a86108c 312 }
62e76326 313
0cd30ba5 314 do_close = F->flags.close_request;
62e76326 315
25354045 316 if (fdd->wrt_handle) {
62e76326 317 DWCB *callback = fdd->wrt_handle;
318 void *cbdata;
319 fdd->wrt_handle = NULL;
320
321 if (cbdataReferenceValidDone(fdd->wrt_handle_data, &cbdata)) {
322 callback(fd, status, len, cbdata);
323 /*
324 * NOTE, this callback can close the FD, so we must
325 * not touch 'F', 'fdd', etc. after this.
326 */
327 PROF_stop(diskHandleWrite);
328 return;
329 /* XXX But what about close_request??? */
330 }
25354045 331 }
62e76326 332
68c21f71 333 if (do_close)
62e76326 334 file_close(fd);
335
88bfe092 336 PROF_stop(diskHandleWrite);
090089c4 337}
338
090089c4 339/* write block to a file */
340/* write back queue. Only one writer at a time. */
341/* call a handle when writing is complete. */
e3ef2b09 342void
3ebcfaa1 343file_write(int fd,
62e76326 344 off_t file_offset,
345 void const *ptr_to_buf,
346 int len,
347 DWCB * handle,
348 void *handle_data,
349 FREE * free_func)
090089c4 350{
c6ac7aae 351 dwrite_q *wq = NULL;
48cc3fcf 352 fde *F = &fd_table[fd];
88bfe092 353 PROF_start(file_write);
48cc3fcf 354 assert(fd >= 0);
60c0b5a2 355 assert(F->flags.open);
090089c4 356 /* if we got here. Caller is eligible to write. */
e6ccf245 357 wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
d377699f 358 wq->file_offset = file_offset;
e6ccf245 359 wq->buf = (char *)ptr_to_buf;
090089c4 360 wq->len = len;
d377699f 361 wq->buf_offset = 0;
090089c4 362 wq->next = NULL;
ed7f0b6a 363 wq->free_func = free_func;
62e76326 364
fa80a8ef 365 if (!F->disk.wrt_handle_data) {
62e76326 366 F->disk.wrt_handle = handle;
367 F->disk.wrt_handle_data = cbdataReference(handle_data);
fa80a8ef 368 } else {
62e76326 369 /* Detect if there is multiple concurrent users of this fd.. we only support one callback */
370 assert(F->disk.wrt_handle_data == handle_data && F->disk.wrt_handle == handle);
fa80a8ef 371 }
62e76326 372
090089c4 373 /* add to queue */
48cc3fcf 374 if (F->disk.write_q == NULL) {
62e76326 375 /* empty queue */
376 F->disk.write_q = F->disk.write_q_tail = wq;
090089c4 377 } else {
62e76326 378 F->disk.write_q_tail->next = wq;
379 F->disk.write_q_tail = wq;
090089c4 380 }
62e76326 381
0cd30ba5 382 if (!F->flags.write_daemon) {
62e76326 383 diskHandleWrite(fd, NULL);
429fdbec 384 }
62e76326 385
88bfe092 386 PROF_stop(file_write);
090089c4 387}
388
23b2b404 389/*
390 * a wrapper around file_write to allow for MemBuf to be file_written
391 * in a snap
392 */
137ee196 393void
394file_write_mbuf(int fd, off_t off, MemBuf mb, DWCB * handler, void *handler_data)
395{
2fe7eff9 396 file_write(fd, off, mb.buf, mb.size, handler, handler_data, mb.freeFunc());
137ee196 397}
090089c4 398
399/* Read from FD */
582b6456 400static void
401diskHandleRead(int fd, void *data)
090089c4 402{
e6ccf245 403 dread_ctrl *ctrl_dat = (dread_ctrl *)data;
edd2eb63 404 fde *F = &fd_table[fd];
090089c4 405 int len;
2391a162 406 int rc = DISK_OK;
65d548bf 407 /*
408 * FD < 0 indicates premature close; we just have to free
409 * the state data.
410 */
62e76326 411
65d548bf 412 if (fd < 0) {
62e76326 413 memFree(ctrl_dat, MEM_DREAD_CTRL);
414 return;
65d548bf 415 }
62e76326 416
88bfe092 417 PROF_start(diskHandleRead);
62e76326 418
034b5ea4 419#if WRITES_MAINTAIN_DISK_OFFSET
711982d8 420 if (F->disk.offset != ctrl_dat->offset) {
034b5ea4
AR
421#else
422 {
423#endif
4a7a3d56 424 debugs(6, 3, "diskHandleRead: FD " << fd << " seeking to offset " << ctrl_dat->offset);
62e76326 425 lseek(fd, ctrl_dat->offset, SEEK_SET); /* XXX ignore return? */
95dc7ff4 426 ++ statCounter.syscalls.disk.seeks;
62e76326 427 F->disk.offset = ctrl_dat->offset;
711982d8 428 }
62e76326 429
b870e0b4 430 errno = 0;
1f7c9178 431 len = FD_READ_METHOD(fd, ctrl_dat->buf, ctrl_dat->req_len);
62e76326 432
015b507a 433 if (len > 0)
62e76326 434 F->disk.offset += len;
435
95dc7ff4 436 ++ statCounter.syscalls.disk.reads;
62e76326 437
4f92c80c 438 fd_bytes(fd, len, FD_READ);
62e76326 439
0a0bf5db 440 if (len < 0) {
62e76326 441 if (ignoreErrno(errno)) {
d841c88d 442 Comm::SetSelect(fd, COMM_SELECT_READ, diskHandleRead, ctrl_dat, 0);
62e76326 443 PROF_stop(diskHandleRead);
444 return;
445 }
446
e0236918 447 debugs(50, DBG_IMPORTANT, "diskHandleRead: FD " << fd << ": " << xstrerror());
62e76326 448 len = 0;
449 rc = DISK_ERROR;
090089c4 450 } else if (len == 0) {
62e76326 451 rc = DISK_EOF;
090089c4 452 }
62e76326 453
fa80a8ef 454 if (cbdataReferenceValid(ctrl_dat->client_data))
62e76326 455 ctrl_dat->handler(fd, ctrl_dat->buf, len, rc, ctrl_dat->client_data);
456
fa80a8ef 457 cbdataReferenceDone(ctrl_dat->client_data);
62e76326 458
db1cd23c 459 memFree(ctrl_dat, MEM_DREAD_CTRL);
62e76326 460
88bfe092 461 PROF_stop(diskHandleRead);
090089c4 462}
463
090089c4 464/* start read operation */
62e76326 465/* buffer must be allocated from the caller.
26ac0430 466 * It must have at least req_len space in there.
090089c4 467 * call handler when a reading is complete. */
2391a162 468void
d377699f 469file_read(int fd, char *buf, int req_len, off_t offset, DRCB * handler, void *client_data)
090089c4 470{
471 dread_ctrl *ctrl_dat;
88bfe092 472 PROF_start(file_read);
711982d8 473 assert(fd >= 0);
e6ccf245 474 ctrl_dat = (dread_ctrl *)memAllocate(MEM_DREAD_CTRL);
090089c4 475 ctrl_dat->fd = fd;
476 ctrl_dat->offset = offset;
477 ctrl_dat->req_len = req_len;
478 ctrl_dat->buf = buf;
090089c4 479 ctrl_dat->end_of_file = 0;
480 ctrl_dat->handler = handler;
fa80a8ef 481 ctrl_dat->client_data = cbdataReference(client_data);
0a0bf5db 482 diskHandleRead(fd, ctrl_dat);
88bfe092 483 PROF_stop(file_read);
090089c4 484}
c8f4eac4 485
486void
487safeunlink(const char *s, int quiet)
488{
95dc7ff4 489 ++ statCounter.syscalls.disk.unlinks;
c8f4eac4 490
491 if (unlink(s) < 0 && !quiet)
e0236918 492 debugs(50, DBG_IMPORTANT, "safeunlink: Couldn't delete " << s << ": " << xstrerror());
c8f4eac4 493}
494
495/*
496 * Same as rename(2) but complains if something goes wrong;
26ac0430 497 * the caller is responsible for handing and explaining the
c8f4eac4 498 * consequences of errors.
499 */
500int
501xrename(const char *from, const char *to)
502{
bf8fe701 503 debugs(21, 2, "xrename: renaming " << from << " to " << to);
be266cb2 504#if _SQUID_OS2_ || _SQUID_WINDOWS_
6ca34f6f 505 remove(to);
c8f4eac4 506#endif
507
508 if (0 == rename(from, to))
509 return 0;
510
bf8fe701 511 debugs(21, errno == ENOENT ? 2 : 1, "xrename: Cannot rename " << from << " to " << to << ": " << xstrerror());
c8f4eac4 512
513 return -1;
514}
515