]> git.ipfire.org Git - thirdparty/squid.git/blame - src/disk.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / disk.cc
CommitLineData
30a4f2a8 1/*
bde978a6 2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
e25c139f 3 *
bbc27441
AJ
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
30a4f2a8 7 */
ed43818f 8
bbc27441
AJ
9/* DEBUG: section 06 Disk I/O Routines */
10
582c2af2 11#include "squid.h"
d841c88d 12#include "comm/Loops.h"
65914896 13#include "disk.h"
c4ad1349 14#include "fd.h"
528b2c61 15#include "fde.h"
67679543 16#include "globals.h"
0eb49b6d 17#include "MemBuf.h"
582c2af2 18#include "profiler/Profiler.h"
e4f1fdae 19#include "StatCounters.h"
090089c4 20
1a30fdf5 21#include <cerrno>
21d845b1 22
95d15928 23static PF diskHandleRead;
24static PF diskHandleWrite;
24382924 25
be266cb2 26#if _SQUID_WINDOWS_ || _SQUID_OS2_
1a94f598 27static int
28diskWriteIsComplete(int fd)
29{
30 return fd_table[fd].disk.write_q ? 0 : 1;
31}
62e76326 32
1a94f598 33#endif
34
04cece06 35void
0673c0ba 36disk_init(void)
090089c4 37{
60df005c 38 (void) 0;
090089c4 39}
40
59a09b98
FC
41/* hack needed on SunStudio to avoid linkage convention mismatch */
42static void cxx_xfree(void *ptr)
43{
f673997d 44 xfree(ptr);
59a09b98
FC
45}
46
1a94f598 47/*
48 * opens a disk file specified by 'path'. This function always
49 * blocks! There is no callback.
50 */
684c2720 51int
1a94f598 52file_open(const char *path, int mode)
090089c4 53{
090089c4 54 int fd;
88bfe092 55 PROF_start(file_open);
62e76326 56
5d1a7121 57 if (FILE_MODE(mode) == O_WRONLY)
62e76326 58 mode |= O_APPEND;
59
b870e0b4 60 errno = 0;
62e76326 61
0a0bf5db 62 fd = open(path, mode, 0644);
62e76326 63
95dc7ff4 64 ++ statCounter.syscalls.disk.opens;
62e76326 65
0a0bf5db 66 if (fd < 0) {
bf8fe701 67 debugs(50, 3, "file_open: error opening file " << path << ": " << xstrerror());
62e76326 68 fd = DISK_ERROR;
2391a162 69 } else {
bf8fe701 70 debugs(6, 5, "file_open: FD " << fd);
62e76326 71 commSetCloseOnExec(fd);
72 fd_open(fd, FD_FILE, path);
090089c4 73 }
62e76326 74
88bfe092 75 PROF_stop(file_open);
2391a162 76 return fd;
0a0bf5db 77}
78
090089c4 79/* close a disk file. */
95d15928 80void
b8d8561b 81file_close(int fd)
090089c4 82{
76f87348 83 fde *F = &fd_table[fd];
2391a162 84 PF *read_callback;
88bfe092 85 PROF_start(file_close);
25354045 86 assert(fd >= 0);
60c0b5a2 87 assert(F->flags.open);
62e76326 88
2391a162 89 if ((read_callback = F->read_handler)) {
62e76326 90 F->read_handler = NULL;
91 read_callback(-1, F->read_data);
65d548bf 92 }
62e76326 93
0cd30ba5 94 if (F->flags.write_daemon) {
be266cb2 95#if _SQUID_WINDOWS_ || _SQUID_OS2_
62e76326 96 /*
97 * on some operating systems, you can not delete or rename
98 * open files, so we won't allow delayed close.
99 */
62e76326 100 while (!diskWriteIsComplete(fd))
101 diskHandleWrite(fd, NULL);
cd377065 102#else
be4d35dc 103 F->flags.close_request = true;
bf8fe701 104 debugs(6, 2, "file_close: FD " << fd << ", delaying close");
62e76326 105 PROF_stop(file_close);
62e76326 106 return;
cd377065 107#endif
62e76326 108
fb247d78 109 }
62e76326 110
65d548bf 111 /*
112 * Assert there is no write callback. Otherwise we might be
113 * leaking write state data by closing the descriptor
114 */
115 assert(F->write_handler == NULL);
62e76326 116
42f99d0d 117#if CALL_FSYNC_BEFORE_CLOSE
62e76326 118
42f99d0d 119 fsync(fd);
62e76326 120
42f99d0d 121#endif
62e76326 122
95d15928 123 close(fd);
62e76326 124
bf8fe701 125 debugs(6, F->flags.close_request ? 2 : 5, "file_close: FD " << fd << " really closing\n");
62e76326 126
6cf028ab 127 fd_close(fd);
62e76326 128
95dc7ff4 129 ++ statCounter.syscalls.disk.closes;
62e76326 130
88bfe092 131 PROF_stop(file_close);
090089c4 132}
133
f02b8498 134/*
135 * This function has the purpose of combining multiple writes. This is
136 * to facilitate the ASYNC_IO option since it can only guarantee 1
137 * write to a file per trip around the comm.c select() loop. That's bad
138 * because more than 1 write can be made to the access.log file per
139 * trip, and so this code is purely designed to help batch multiple
140 * sequential writes to the access.log file. Squid will never issue
141 * multiple writes for any other file type during 1 trip around the
142 * select() loop. --SLF
143 */
582b6456 144static void
5fed1735 145diskCombineWrites(_fde_disk *fdd)
090089c4 146{
f02b8498 147 /*
148 * We need to combine multiple write requests on an FD's write
149 * queue But only if we don't need to seek() in between them, ugh!
150 * XXX This currently ignores any seeks (file_offset)
151 */
62e76326 152
26ac0430 153 if (fdd->write_q != NULL && fdd->write_q->next != NULL) {
b115733c 154 int len = 0;
62e76326 155
b115733c 156 for (dwrite_q *q = fdd->write_q; q != NULL; q = q->next)
62e76326 157 len += q->len - q->buf_offset;
158
b115733c 159 dwrite_q *wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
62e76326 160
161 wq->buf = (char *)xmalloc(len);
162
163 wq->len = 0;
164
165 wq->buf_offset = 0;
166
167 wq->next = NULL;
168
59a09b98 169 wq->free_func = cxx_xfree;
62e76326 170
b115733c
AJ
171 while (fdd->write_q != NULL) {
172 dwrite_q *q = fdd->write_q;
173
62e76326 174 len = q->len - q->buf_offset;
41d00cd3 175 memcpy(wq->buf + wq->len, q->buf + q->buf_offset, len);
62e76326 176 wq->len += len;
177 fdd->write_q = q->next;
178
179 if (q->free_func)
6ca34f6f 180 q->free_func(q->buf);
62e76326 181
b115733c
AJ
182 memFree(q, MEM_DWRITE_Q);
183 };
62e76326 184
185 fdd->write_q_tail = wq;
186
187 fdd->write_q = wq;
0a0bf5db 188 }
f02b8498 189}
190
191/* write handler */
192static void
ced8def3 193diskHandleWrite(int fd, void *)
f02b8498 194{
195 int len = 0;
f02b8498 196 fde *F = &fd_table[fd];
62e76326 197
5fed1735 198 _fde_disk *fdd = &F->disk;
2391a162 199 dwrite_q *q = fdd->write_q;
200 int status = DISK_OK;
be4d35dc 201 bool do_close;
62e76326 202
2391a162 203 if (NULL == q)
62e76326 204 return;
205
88bfe092 206 PROF_start(diskHandleWrite);
62e76326 207
bf8fe701 208 debugs(6, 3, "diskHandleWrite: FD " << fd);
62e76326 209
be4d35dc 210 F->flags.write_daemon = false;
62e76326 211
8350fe9b 212 assert(fdd->write_q != NULL);
62e76326 213
d377699f 214 assert(fdd->write_q->len > fdd->write_q->buf_offset);
62e76326 215
e2851fe7
AR
216 debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " <<
217 (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes at " <<
218 fdd->write_q->file_offset);
62e76326 219
b870e0b4 220 errno = 0;
62e76326 221
cd748f27 222 if (fdd->write_q->file_offset != -1)
e2851fe7 223 lseek(fd, fdd->write_q->file_offset, SEEK_SET); /* XXX ignore return? */
62e76326 224
1f7c9178 225 len = FD_WRITE_METHOD(fd,
62e76326 226 fdd->write_q->buf + fdd->write_q->buf_offset,
227 fdd->write_q->len - fdd->write_q->buf_offset);
228
bf8fe701 229 debugs(6, 3, "diskHandleWrite: FD " << fd << " len = " << len);
62e76326 230
95dc7ff4 231 ++ statCounter.syscalls.disk.writes;
62e76326 232
6cf028ab 233 fd_bytes(fd, len, FD_WRITE);
62e76326 234
0a0bf5db 235 if (len < 0) {
62e76326 236 if (!ignoreErrno(errno)) {
237 status = errno == ENOSPC ? DISK_NO_SPACE_LEFT : DISK_ERROR;
e0236918 238 debugs(50, DBG_IMPORTANT, "diskHandleWrite: FD " << fd << ": disk write error: " << xstrerror());
bf8fe701 239
62e76326 240 /*
241 * If there is no write callback, then this file is
242 * most likely something important like a log file, or
243 * an interprocess pipe. Its not a swapfile. We feel
244 * that a write failure on a log file is rather important,
245 * and Squid doesn't otherwise deal with this condition.
246 * So to get the administrators attention, we exit with
247 * a fatal message.
248 */
249
250 if (fdd->wrt_handle == NULL)
251 fatal("Write failure -- check your disk space and cache.log");
252
253 /*
254 * If there is a write failure, then we notify the
255 * upper layer via the callback, at the end of this
256 * function. Meanwhile, flush all pending buffers
257 * here. Let the upper layer decide how to handle the
258 * failure. This will prevent experiencing multiple,
259 * repeated write failures for the same FD because of
260 * the queued data.
261 */
262 do {
263 fdd->write_q = q->next;
264
265 if (q->free_func)
6ca34f6f 266 q->free_func(q->buf);
62e76326 267
268 if (q) {
269 memFree(q, MEM_DWRITE_Q);
270 q = NULL;
271 }
272 } while ((q = fdd->write_q));
273 }
274
275 len = 0;
0a0bf5db 276 }
62e76326 277
8350fe9b 278 if (q != NULL) {
62e76326 279 /* q might become NULL from write failure above */
280 q->buf_offset += len;
281
282 if (q->buf_offset > q->len)
e0236918 283 debugs(50, DBG_IMPORTANT, "diskHandleWriteComplete: q->buf_offset > q->len (" <<
bf8fe701 284 q << "," << (int) q->buf_offset << ", " << q->len << ", " <<
285 len << " FD " << fd << ")");
286
62e76326 287 assert(q->buf_offset <= q->len);
288
289 if (q->buf_offset == q->len) {
290 /* complete write */
291 fdd->write_q = q->next;
292
293 if (q->free_func)
6ca34f6f 294 q->free_func(q->buf);
62e76326 295
296 if (q) {
297 memFree(q, MEM_DWRITE_Q);
298 q = NULL;
299 }
300 }
090089c4 301 }
62e76326 302
de866d20 303 if (fdd->write_q == NULL) {
62e76326 304 /* no more data */
305 fdd->write_q_tail = NULL;
de866d20 306 } else {
62e76326 307 /* another block is queued */
308 diskCombineWrites(fdd);
d841c88d 309 Comm::SetSelect(fd, COMM_SELECT_WRITE, diskHandleWrite, NULL, 0);
be4d35dc 310 F->flags.write_daemon = true;
4a86108c 311 }
62e76326 312
0cd30ba5 313 do_close = F->flags.close_request;
62e76326 314
25354045 315 if (fdd->wrt_handle) {
62e76326 316 DWCB *callback = fdd->wrt_handle;
317 void *cbdata;
318 fdd->wrt_handle = NULL;
319
320 if (cbdataReferenceValidDone(fdd->wrt_handle_data, &cbdata)) {
321 callback(fd, status, len, cbdata);
322 /*
323 * NOTE, this callback can close the FD, so we must
324 * not touch 'F', 'fdd', etc. after this.
325 */
326 PROF_stop(diskHandleWrite);
327 return;
328 /* XXX But what about close_request??? */
329 }
25354045 330 }
62e76326 331
68c21f71 332 if (do_close)
62e76326 333 file_close(fd);
334
88bfe092 335 PROF_stop(diskHandleWrite);
090089c4 336}
337
090089c4 338/* write block to a file */
339/* write back queue. Only one writer at a time. */
340/* call a handle when writing is complete. */
e3ef2b09 341void
3ebcfaa1 342file_write(int fd,
62e76326 343 off_t file_offset,
344 void const *ptr_to_buf,
345 int len,
346 DWCB * handle,
347 void *handle_data,
348 FREE * free_func)
090089c4 349{
c6ac7aae 350 dwrite_q *wq = NULL;
48cc3fcf 351 fde *F = &fd_table[fd];
88bfe092 352 PROF_start(file_write);
48cc3fcf 353 assert(fd >= 0);
60c0b5a2 354 assert(F->flags.open);
090089c4 355 /* if we got here. Caller is eligible to write. */
e6ccf245 356 wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
d377699f 357 wq->file_offset = file_offset;
e6ccf245 358 wq->buf = (char *)ptr_to_buf;
090089c4 359 wq->len = len;
d377699f 360 wq->buf_offset = 0;
090089c4 361 wq->next = NULL;
ed7f0b6a 362 wq->free_func = free_func;
62e76326 363
fa80a8ef 364 if (!F->disk.wrt_handle_data) {
62e76326 365 F->disk.wrt_handle = handle;
366 F->disk.wrt_handle_data = cbdataReference(handle_data);
fa80a8ef 367 } else {
62e76326 368 /* Detect if there is multiple concurrent users of this fd.. we only support one callback */
369 assert(F->disk.wrt_handle_data == handle_data && F->disk.wrt_handle == handle);
fa80a8ef 370 }
62e76326 371
090089c4 372 /* add to queue */
48cc3fcf 373 if (F->disk.write_q == NULL) {
62e76326 374 /* empty queue */
375 F->disk.write_q = F->disk.write_q_tail = wq;
090089c4 376 } else {
62e76326 377 F->disk.write_q_tail->next = wq;
378 F->disk.write_q_tail = wq;
090089c4 379 }
62e76326 380
0cd30ba5 381 if (!F->flags.write_daemon) {
62e76326 382 diskHandleWrite(fd, NULL);
429fdbec 383 }
62e76326 384
88bfe092 385 PROF_stop(file_write);
090089c4 386}
387
23b2b404 388/*
389 * a wrapper around file_write to allow for MemBuf to be file_written
390 * in a snap
391 */
137ee196 392void
393file_write_mbuf(int fd, off_t off, MemBuf mb, DWCB * handler, void *handler_data)
394{
2fe7eff9 395 file_write(fd, off, mb.buf, mb.size, handler, handler_data, mb.freeFunc());
137ee196 396}
090089c4 397
398/* Read from FD */
582b6456 399static void
400diskHandleRead(int fd, void *data)
090089c4 401{
e6ccf245 402 dread_ctrl *ctrl_dat = (dread_ctrl *)data;
edd2eb63 403 fde *F = &fd_table[fd];
090089c4 404 int len;
2391a162 405 int rc = DISK_OK;
65d548bf 406 /*
407 * FD < 0 indicates premature close; we just have to free
408 * the state data.
409 */
62e76326 410
65d548bf 411 if (fd < 0) {
62e76326 412 memFree(ctrl_dat, MEM_DREAD_CTRL);
413 return;
65d548bf 414 }
62e76326 415
88bfe092 416 PROF_start(diskHandleRead);
62e76326 417
034b5ea4 418#if WRITES_MAINTAIN_DISK_OFFSET
711982d8 419 if (F->disk.offset != ctrl_dat->offset) {
034b5ea4
AR
420#else
421 {
422#endif
4a7a3d56 423 debugs(6, 3, "diskHandleRead: FD " << fd << " seeking to offset " << ctrl_dat->offset);
f53969cc 424 lseek(fd, ctrl_dat->offset, SEEK_SET); /* XXX ignore return? */
95dc7ff4 425 ++ statCounter.syscalls.disk.seeks;
62e76326 426 F->disk.offset = ctrl_dat->offset;
711982d8 427 }
62e76326 428
b870e0b4 429 errno = 0;
1f7c9178 430 len = FD_READ_METHOD(fd, ctrl_dat->buf, ctrl_dat->req_len);
62e76326 431
015b507a 432 if (len > 0)
62e76326 433 F->disk.offset += len;
434
95dc7ff4 435 ++ statCounter.syscalls.disk.reads;
62e76326 436
4f92c80c 437 fd_bytes(fd, len, FD_READ);
62e76326 438
0a0bf5db 439 if (len < 0) {
62e76326 440 if (ignoreErrno(errno)) {
d841c88d 441 Comm::SetSelect(fd, COMM_SELECT_READ, diskHandleRead, ctrl_dat, 0);
62e76326 442 PROF_stop(diskHandleRead);
443 return;
444 }
445
e0236918 446 debugs(50, DBG_IMPORTANT, "diskHandleRead: FD " << fd << ": " << xstrerror());
62e76326 447 len = 0;
448 rc = DISK_ERROR;
090089c4 449 } else if (len == 0) {
62e76326 450 rc = DISK_EOF;
090089c4 451 }
62e76326 452
fa80a8ef 453 if (cbdataReferenceValid(ctrl_dat->client_data))
62e76326 454 ctrl_dat->handler(fd, ctrl_dat->buf, len, rc, ctrl_dat->client_data);
455
fa80a8ef 456 cbdataReferenceDone(ctrl_dat->client_data);
62e76326 457
db1cd23c 458 memFree(ctrl_dat, MEM_DREAD_CTRL);
62e76326 459
88bfe092 460 PROF_stop(diskHandleRead);
090089c4 461}
462
090089c4 463/* start read operation */
62e76326 464/* buffer must be allocated from the caller.
26ac0430 465 * It must have at least req_len space in there.
090089c4 466 * call handler when a reading is complete. */
2391a162 467void
d377699f 468file_read(int fd, char *buf, int req_len, off_t offset, DRCB * handler, void *client_data)
090089c4 469{
470 dread_ctrl *ctrl_dat;
88bfe092 471 PROF_start(file_read);
711982d8 472 assert(fd >= 0);
e6ccf245 473 ctrl_dat = (dread_ctrl *)memAllocate(MEM_DREAD_CTRL);
090089c4 474 ctrl_dat->fd = fd;
475 ctrl_dat->offset = offset;
476 ctrl_dat->req_len = req_len;
477 ctrl_dat->buf = buf;
090089c4 478 ctrl_dat->end_of_file = 0;
479 ctrl_dat->handler = handler;
fa80a8ef 480 ctrl_dat->client_data = cbdataReference(client_data);
0a0bf5db 481 diskHandleRead(fd, ctrl_dat);
88bfe092 482 PROF_stop(file_read);
090089c4 483}
c8f4eac4 484
485void
486safeunlink(const char *s, int quiet)
487{
95dc7ff4 488 ++ statCounter.syscalls.disk.unlinks;
c8f4eac4 489
490 if (unlink(s) < 0 && !quiet)
e0236918 491 debugs(50, DBG_IMPORTANT, "safeunlink: Couldn't delete " << s << ": " << xstrerror());
c8f4eac4 492}
493
494/*
495 * Same as rename(2) but complains if something goes wrong;
26ac0430 496 * the caller is responsible for handing and explaining the
c8f4eac4 497 * consequences of errors.
498 */
499int
500xrename(const char *from, const char *to)
501{
bf8fe701 502 debugs(21, 2, "xrename: renaming " << from << " to " << to);
be266cb2 503#if _SQUID_OS2_ || _SQUID_WINDOWS_
6ca34f6f 504 remove(to);
c8f4eac4 505#endif
506
507 if (0 == rename(from, to))
508 return 0;
509
bf8fe701 510 debugs(21, errno == ENOENT ? 2 : 1, "xrename: Cannot rename " << from << " to " << to << ": " << xstrerror());
c8f4eac4 511
512 return -1;
513}
514