]> git.ipfire.org Git - thirdparty/squid.git/blame - src/fs_io.cc
Store API and layout polishing. No functionality changes intended.
[thirdparty/squid.git] / src / fs_io.cc
CommitLineData
30a4f2a8 1/*
bde978a6 2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
e25c139f 3 *
bbc27441
AJ
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
30a4f2a8 7 */
ed43818f 8
bbc27441
AJ
9/* DEBUG: section 06 Disk I/O Routines */
10
582c2af2 11#include "squid.h"
d841c88d 12#include "comm/Loops.h"
c4ad1349 13#include "fd.h"
528b2c61 14#include "fde.h"
b3f7fd88 15#include "fs_io.h"
67679543 16#include "globals.h"
0eb49b6d 17#include "MemBuf.h"
582c2af2 18#include "profiler/Profiler.h"
e4f1fdae 19#include "StatCounters.h"
090089c4 20
1a30fdf5 21#include <cerrno>
21d845b1 22
95d15928 23static PF diskHandleRead;
24static PF diskHandleWrite;
24382924 25
be266cb2 26#if _SQUID_WINDOWS_ || _SQUID_OS2_
1a94f598 27static int
28diskWriteIsComplete(int fd)
29{
30 return fd_table[fd].disk.write_q ? 0 : 1;
31}
62e76326 32
1a94f598 33#endif
34
59a09b98
FC
35/* hack needed on SunStudio to avoid linkage convention mismatch */
36static void cxx_xfree(void *ptr)
37{
f673997d 38 xfree(ptr);
59a09b98
FC
39}
40
1a94f598 41/*
42 * opens a disk file specified by 'path'. This function always
43 * blocks! There is no callback.
44 */
684c2720 45int
1a94f598 46file_open(const char *path, int mode)
090089c4 47{
090089c4 48 int fd;
88bfe092 49 PROF_start(file_open);
62e76326 50
5d1a7121 51 if (FILE_MODE(mode) == O_WRONLY)
62e76326 52 mode |= O_APPEND;
53
b870e0b4 54 errno = 0;
62e76326 55
0a0bf5db 56 fd = open(path, mode, 0644);
62e76326 57
95dc7ff4 58 ++ statCounter.syscalls.disk.opens;
62e76326 59
0a0bf5db 60 if (fd < 0) {
bf8fe701 61 debugs(50, 3, "file_open: error opening file " << path << ": " << xstrerror());
62e76326 62 fd = DISK_ERROR;
2391a162 63 } else {
bf8fe701 64 debugs(6, 5, "file_open: FD " << fd);
62e76326 65 commSetCloseOnExec(fd);
66 fd_open(fd, FD_FILE, path);
090089c4 67 }
62e76326 68
88bfe092 69 PROF_stop(file_open);
2391a162 70 return fd;
0a0bf5db 71}
72
090089c4 73/* close a disk file. */
95d15928 74void
b8d8561b 75file_close(int fd)
090089c4 76{
76f87348 77 fde *F = &fd_table[fd];
2391a162 78 PF *read_callback;
88bfe092 79 PROF_start(file_close);
25354045 80 assert(fd >= 0);
60c0b5a2 81 assert(F->flags.open);
62e76326 82
2391a162 83 if ((read_callback = F->read_handler)) {
62e76326 84 F->read_handler = NULL;
85 read_callback(-1, F->read_data);
65d548bf 86 }
62e76326 87
0cd30ba5 88 if (F->flags.write_daemon) {
be266cb2 89#if _SQUID_WINDOWS_ || _SQUID_OS2_
62e76326 90 /*
91 * on some operating systems, you can not delete or rename
92 * open files, so we won't allow delayed close.
93 */
62e76326 94 while (!diskWriteIsComplete(fd))
95 diskHandleWrite(fd, NULL);
cd377065 96#else
be4d35dc 97 F->flags.close_request = true;
bf8fe701 98 debugs(6, 2, "file_close: FD " << fd << ", delaying close");
62e76326 99 PROF_stop(file_close);
62e76326 100 return;
cd377065 101#endif
62e76326 102
fb247d78 103 }
62e76326 104
65d548bf 105 /*
106 * Assert there is no write callback. Otherwise we might be
107 * leaking write state data by closing the descriptor
108 */
109 assert(F->write_handler == NULL);
62e76326 110
42f99d0d 111#if CALL_FSYNC_BEFORE_CLOSE
62e76326 112
42f99d0d 113 fsync(fd);
62e76326 114
42f99d0d 115#endif
62e76326 116
95d15928 117 close(fd);
62e76326 118
bf8fe701 119 debugs(6, F->flags.close_request ? 2 : 5, "file_close: FD " << fd << " really closing\n");
62e76326 120
6cf028ab 121 fd_close(fd);
62e76326 122
95dc7ff4 123 ++ statCounter.syscalls.disk.closes;
62e76326 124
88bfe092 125 PROF_stop(file_close);
090089c4 126}
127
f02b8498 128/*
129 * This function has the purpose of combining multiple writes. This is
130 * to facilitate the ASYNC_IO option since it can only guarantee 1
131 * write to a file per trip around the comm.c select() loop. That's bad
132 * because more than 1 write can be made to the access.log file per
133 * trip, and so this code is purely designed to help batch multiple
134 * sequential writes to the access.log file. Squid will never issue
135 * multiple writes for any other file type during 1 trip around the
136 * select() loop. --SLF
137 */
582b6456 138static void
5fed1735 139diskCombineWrites(_fde_disk *fdd)
090089c4 140{
f02b8498 141 /*
142 * We need to combine multiple write requests on an FD's write
143 * queue But only if we don't need to seek() in between them, ugh!
144 * XXX This currently ignores any seeks (file_offset)
145 */
62e76326 146
26ac0430 147 if (fdd->write_q != NULL && fdd->write_q->next != NULL) {
b115733c 148 int len = 0;
62e76326 149
b115733c 150 for (dwrite_q *q = fdd->write_q; q != NULL; q = q->next)
62e76326 151 len += q->len - q->buf_offset;
152
b115733c 153 dwrite_q *wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
62e76326 154
155 wq->buf = (char *)xmalloc(len);
156
157 wq->len = 0;
158
159 wq->buf_offset = 0;
160
161 wq->next = NULL;
162
59a09b98 163 wq->free_func = cxx_xfree;
62e76326 164
b115733c
AJ
165 while (fdd->write_q != NULL) {
166 dwrite_q *q = fdd->write_q;
167
62e76326 168 len = q->len - q->buf_offset;
41d00cd3 169 memcpy(wq->buf + wq->len, q->buf + q->buf_offset, len);
62e76326 170 wq->len += len;
171 fdd->write_q = q->next;
172
173 if (q->free_func)
6ca34f6f 174 q->free_func(q->buf);
62e76326 175
b115733c
AJ
176 memFree(q, MEM_DWRITE_Q);
177 };
62e76326 178
179 fdd->write_q_tail = wq;
180
181 fdd->write_q = wq;
0a0bf5db 182 }
f02b8498 183}
184
185/* write handler */
186static void
ced8def3 187diskHandleWrite(int fd, void *)
f02b8498 188{
189 int len = 0;
f02b8498 190 fde *F = &fd_table[fd];
62e76326 191
5fed1735 192 _fde_disk *fdd = &F->disk;
2391a162 193 dwrite_q *q = fdd->write_q;
194 int status = DISK_OK;
be4d35dc 195 bool do_close;
62e76326 196
2391a162 197 if (NULL == q)
62e76326 198 return;
199
88bfe092 200 PROF_start(diskHandleWrite);
62e76326 201
bf8fe701 202 debugs(6, 3, "diskHandleWrite: FD " << fd);
62e76326 203
be4d35dc 204 F->flags.write_daemon = false;
62e76326 205
8350fe9b 206 assert(fdd->write_q != NULL);
62e76326 207
d377699f 208 assert(fdd->write_q->len > fdd->write_q->buf_offset);
62e76326 209
e2851fe7
AR
210 debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " <<
211 (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes at " <<
212 fdd->write_q->file_offset);
62e76326 213
b870e0b4 214 errno = 0;
62e76326 215
914fbbba
FC
216 if (fdd->write_q->file_offset != -1) {
217 errno = 0;
218 if (lseek(fd, fdd->write_q->file_offset, SEEK_SET) == -1) {
3211fedb 219 debugs(50, DBG_IMPORTANT, "error in seek for fd " << fd << ": " << xstrerror());
914fbbba
FC
220 // XXX: handle error?
221 }
222 }
62e76326 223
1f7c9178 224 len = FD_WRITE_METHOD(fd,
62e76326 225 fdd->write_q->buf + fdd->write_q->buf_offset,
226 fdd->write_q->len - fdd->write_q->buf_offset);
227
bf8fe701 228 debugs(6, 3, "diskHandleWrite: FD " << fd << " len = " << len);
62e76326 229
95dc7ff4 230 ++ statCounter.syscalls.disk.writes;
62e76326 231
6cf028ab 232 fd_bytes(fd, len, FD_WRITE);
62e76326 233
0a0bf5db 234 if (len < 0) {
62e76326 235 if (!ignoreErrno(errno)) {
236 status = errno == ENOSPC ? DISK_NO_SPACE_LEFT : DISK_ERROR;
e0236918 237 debugs(50, DBG_IMPORTANT, "diskHandleWrite: FD " << fd << ": disk write error: " << xstrerror());
bf8fe701 238
62e76326 239 /*
240 * If there is no write callback, then this file is
241 * most likely something important like a log file, or
242 * an interprocess pipe. Its not a swapfile. We feel
243 * that a write failure on a log file is rather important,
244 * and Squid doesn't otherwise deal with this condition.
245 * So to get the administrators attention, we exit with
246 * a fatal message.
247 */
248
249 if (fdd->wrt_handle == NULL)
250 fatal("Write failure -- check your disk space and cache.log");
251
252 /*
253 * If there is a write failure, then we notify the
254 * upper layer via the callback, at the end of this
255 * function. Meanwhile, flush all pending buffers
256 * here. Let the upper layer decide how to handle the
257 * failure. This will prevent experiencing multiple,
258 * repeated write failures for the same FD because of
259 * the queued data.
260 */
261 do {
262 fdd->write_q = q->next;
263
264 if (q->free_func)
6ca34f6f 265 q->free_func(q->buf);
62e76326 266
267 if (q) {
268 memFree(q, MEM_DWRITE_Q);
269 q = NULL;
270 }
271 } while ((q = fdd->write_q));
272 }
273
274 len = 0;
0a0bf5db 275 }
62e76326 276
8350fe9b 277 if (q != NULL) {
62e76326 278 /* q might become NULL from write failure above */
279 q->buf_offset += len;
280
281 if (q->buf_offset > q->len)
e0236918 282 debugs(50, DBG_IMPORTANT, "diskHandleWriteComplete: q->buf_offset > q->len (" <<
bf8fe701 283 q << "," << (int) q->buf_offset << ", " << q->len << ", " <<
284 len << " FD " << fd << ")");
285
62e76326 286 assert(q->buf_offset <= q->len);
287
288 if (q->buf_offset == q->len) {
289 /* complete write */
290 fdd->write_q = q->next;
291
292 if (q->free_func)
6ca34f6f 293 q->free_func(q->buf);
62e76326 294
295 if (q) {
296 memFree(q, MEM_DWRITE_Q);
297 q = NULL;
298 }
299 }
090089c4 300 }
62e76326 301
de866d20 302 if (fdd->write_q == NULL) {
62e76326 303 /* no more data */
304 fdd->write_q_tail = NULL;
de866d20 305 } else {
62e76326 306 /* another block is queued */
307 diskCombineWrites(fdd);
d841c88d 308 Comm::SetSelect(fd, COMM_SELECT_WRITE, diskHandleWrite, NULL, 0);
be4d35dc 309 F->flags.write_daemon = true;
4a86108c 310 }
62e76326 311
0cd30ba5 312 do_close = F->flags.close_request;
62e76326 313
25354045 314 if (fdd->wrt_handle) {
62e76326 315 DWCB *callback = fdd->wrt_handle;
316 void *cbdata;
317 fdd->wrt_handle = NULL;
318
319 if (cbdataReferenceValidDone(fdd->wrt_handle_data, &cbdata)) {
320 callback(fd, status, len, cbdata);
321 /*
322 * NOTE, this callback can close the FD, so we must
323 * not touch 'F', 'fdd', etc. after this.
324 */
325 PROF_stop(diskHandleWrite);
326 return;
327 /* XXX But what about close_request??? */
328 }
25354045 329 }
62e76326 330
68c21f71 331 if (do_close)
62e76326 332 file_close(fd);
333
88bfe092 334 PROF_stop(diskHandleWrite);
090089c4 335}
336
090089c4 337/* write block to a file */
338/* write back queue. Only one writer at a time. */
339/* call a handle when writing is complete. */
e3ef2b09 340void
3ebcfaa1 341file_write(int fd,
62e76326 342 off_t file_offset,
343 void const *ptr_to_buf,
344 int len,
345 DWCB * handle,
346 void *handle_data,
347 FREE * free_func)
090089c4 348{
c6ac7aae 349 dwrite_q *wq = NULL;
48cc3fcf 350 fde *F = &fd_table[fd];
88bfe092 351 PROF_start(file_write);
48cc3fcf 352 assert(fd >= 0);
60c0b5a2 353 assert(F->flags.open);
090089c4 354 /* if we got here. Caller is eligible to write. */
e6ccf245 355 wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
d377699f 356 wq->file_offset = file_offset;
e6ccf245 357 wq->buf = (char *)ptr_to_buf;
090089c4 358 wq->len = len;
d377699f 359 wq->buf_offset = 0;
090089c4 360 wq->next = NULL;
ed7f0b6a 361 wq->free_func = free_func;
62e76326 362
fa80a8ef 363 if (!F->disk.wrt_handle_data) {
62e76326 364 F->disk.wrt_handle = handle;
365 F->disk.wrt_handle_data = cbdataReference(handle_data);
fa80a8ef 366 } else {
62e76326 367 /* Detect if there is multiple concurrent users of this fd.. we only support one callback */
368 assert(F->disk.wrt_handle_data == handle_data && F->disk.wrt_handle == handle);
fa80a8ef 369 }
62e76326 370
090089c4 371 /* add to queue */
48cc3fcf 372 if (F->disk.write_q == NULL) {
62e76326 373 /* empty queue */
374 F->disk.write_q = F->disk.write_q_tail = wq;
090089c4 375 } else {
62e76326 376 F->disk.write_q_tail->next = wq;
377 F->disk.write_q_tail = wq;
090089c4 378 }
62e76326 379
0cd30ba5 380 if (!F->flags.write_daemon) {
62e76326 381 diskHandleWrite(fd, NULL);
429fdbec 382 }
62e76326 383
88bfe092 384 PROF_stop(file_write);
090089c4 385}
386
23b2b404 387/*
388 * a wrapper around file_write to allow for MemBuf to be file_written
389 * in a snap
390 */
137ee196 391void
392file_write_mbuf(int fd, off_t off, MemBuf mb, DWCB * handler, void *handler_data)
393{
2fe7eff9 394 file_write(fd, off, mb.buf, mb.size, handler, handler_data, mb.freeFunc());
137ee196 395}
090089c4 396
397/* Read from FD */
582b6456 398static void
399diskHandleRead(int fd, void *data)
090089c4 400{
e6ccf245 401 dread_ctrl *ctrl_dat = (dread_ctrl *)data;
edd2eb63 402 fde *F = &fd_table[fd];
090089c4 403 int len;
2391a162 404 int rc = DISK_OK;
65d548bf 405 /*
406 * FD < 0 indicates premature close; we just have to free
407 * the state data.
408 */
62e76326 409
65d548bf 410 if (fd < 0) {
62e76326 411 memFree(ctrl_dat, MEM_DREAD_CTRL);
412 return;
65d548bf 413 }
62e76326 414
88bfe092 415 PROF_start(diskHandleRead);
62e76326 416
034b5ea4 417#if WRITES_MAINTAIN_DISK_OFFSET
711982d8 418 if (F->disk.offset != ctrl_dat->offset) {
034b5ea4
AR
419#else
420 {
421#endif
4a7a3d56 422 debugs(6, 3, "diskHandleRead: FD " << fd << " seeking to offset " << ctrl_dat->offset);
4ab1af1f
FC
423 errno = 0;
424 if (lseek(fd, ctrl_dat->offset, SEEK_SET) == -1) {
425 // shouldn't happen, let's detect that
3211fedb 426 debugs(50, DBG_IMPORTANT, "error in seek for fd " << fd << ": " << xstrerror());
4ab1af1f
FC
427 // XXX handle failures?
428 }
95dc7ff4 429 ++ statCounter.syscalls.disk.seeks;
62e76326 430 F->disk.offset = ctrl_dat->offset;
711982d8 431 }
62e76326 432
b870e0b4 433 errno = 0;
1f7c9178 434 len = FD_READ_METHOD(fd, ctrl_dat->buf, ctrl_dat->req_len);
62e76326 435
015b507a 436 if (len > 0)
62e76326 437 F->disk.offset += len;
438
95dc7ff4 439 ++ statCounter.syscalls.disk.reads;
62e76326 440
4f92c80c 441 fd_bytes(fd, len, FD_READ);
62e76326 442
0a0bf5db 443 if (len < 0) {
62e76326 444 if (ignoreErrno(errno)) {
d841c88d 445 Comm::SetSelect(fd, COMM_SELECT_READ, diskHandleRead, ctrl_dat, 0);
62e76326 446 PROF_stop(diskHandleRead);
447 return;
448 }
449
e0236918 450 debugs(50, DBG_IMPORTANT, "diskHandleRead: FD " << fd << ": " << xstrerror());
62e76326 451 len = 0;
452 rc = DISK_ERROR;
090089c4 453 } else if (len == 0) {
62e76326 454 rc = DISK_EOF;
090089c4 455 }
62e76326 456
fa80a8ef 457 if (cbdataReferenceValid(ctrl_dat->client_data))
62e76326 458 ctrl_dat->handler(fd, ctrl_dat->buf, len, rc, ctrl_dat->client_data);
459
fa80a8ef 460 cbdataReferenceDone(ctrl_dat->client_data);
62e76326 461
db1cd23c 462 memFree(ctrl_dat, MEM_DREAD_CTRL);
62e76326 463
88bfe092 464 PROF_stop(diskHandleRead);
090089c4 465}
466
090089c4 467/* start read operation */
62e76326 468/* buffer must be allocated from the caller.
26ac0430 469 * It must have at least req_len space in there.
090089c4 470 * call handler when a reading is complete. */
2391a162 471void
d377699f 472file_read(int fd, char *buf, int req_len, off_t offset, DRCB * handler, void *client_data)
090089c4 473{
474 dread_ctrl *ctrl_dat;
88bfe092 475 PROF_start(file_read);
711982d8 476 assert(fd >= 0);
e6ccf245 477 ctrl_dat = (dread_ctrl *)memAllocate(MEM_DREAD_CTRL);
090089c4 478 ctrl_dat->fd = fd;
479 ctrl_dat->offset = offset;
480 ctrl_dat->req_len = req_len;
481 ctrl_dat->buf = buf;
090089c4 482 ctrl_dat->end_of_file = 0;
483 ctrl_dat->handler = handler;
fa80a8ef 484 ctrl_dat->client_data = cbdataReference(client_data);
0a0bf5db 485 diskHandleRead(fd, ctrl_dat);
88bfe092 486 PROF_stop(file_read);
090089c4 487}
c8f4eac4 488
489void
490safeunlink(const char *s, int quiet)
491{
95dc7ff4 492 ++ statCounter.syscalls.disk.unlinks;
c8f4eac4 493
494 if (unlink(s) < 0 && !quiet)
e0236918 495 debugs(50, DBG_IMPORTANT, "safeunlink: Couldn't delete " << s << ": " << xstrerror());
c8f4eac4 496}
497
498/*
499 * Same as rename(2) but complains if something goes wrong;
26ac0430 500 * the caller is responsible for handing and explaining the
c8f4eac4 501 * consequences of errors.
502 */
503int
504xrename(const char *from, const char *to)
505{
bf8fe701 506 debugs(21, 2, "xrename: renaming " << from << " to " << to);
be266cb2 507#if _SQUID_OS2_ || _SQUID_WINDOWS_
6ca34f6f 508 remove(to);
c8f4eac4 509#endif
510
511 if (0 == rename(from, to))
512 return 0;
513
bf8fe701 514 debugs(21, errno == ENOENT ? 2 : 1, "xrename: Cannot rename " << from << " to " << to << ": " << xstrerror());
c8f4eac4 515
516 return -1;
517}
518
2745fea5
AR
519int
520fsBlockSize(const char *path, int *blksize)
521{
522 struct statvfs sfs;
523
524 if (xstatvfs(path, &sfs)) {
525 debugs(50, DBG_IMPORTANT, "" << path << ": " << xstrerror());
526 *blksize = 2048;
527 return 1;
528 }
529
530 *blksize = (int) sfs.f_frsize;
531
532 // Sanity check; make sure we have a meaningful value.
533 if (*blksize < 512)
534 *blksize = 2048;
535
536 return 0;
537}
538
539#define fsbtoblk(num, fsbs, bs) \
540 (((fsbs) != 0 && (fsbs) < (bs)) ? \
541 (num) / ((bs) / (fsbs)) : (num) * ((fsbs) / (bs)))
542int
543fsStats(const char *path, int *totl_kb, int *free_kb, int *totl_in, int *free_in)
544{
545 struct statvfs sfs;
546
547 if (xstatvfs(path, &sfs)) {
548 debugs(50, DBG_IMPORTANT, "" << path << ": " << xstrerror());
549 return 1;
550 }
551
552 *totl_kb = (int) fsbtoblk(sfs.f_blocks, sfs.f_frsize, 1024);
553 *free_kb = (int) fsbtoblk(sfs.f_bfree, sfs.f_frsize, 1024);
554 *totl_in = (int) sfs.f_files;
555 *free_in = (int) sfs.f_ffree;
556 return 0;
557}