]> git.ipfire.org Git - thirdparty/squid.git/blame - src/fs_io.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / fs_io.cc
CommitLineData
30a4f2a8 1/*
4ac4a490 2 * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
e25c139f 3 *
bbc27441
AJ
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
30a4f2a8 7 */
ed43818f 8
bbc27441
AJ
9/* DEBUG: section 06 Disk I/O Routines */
10
582c2af2 11#include "squid.h"
d841c88d 12#include "comm/Loops.h"
c4ad1349 13#include "fd.h"
528b2c61 14#include "fde.h"
b3f7fd88 15#include "fs_io.h"
67679543 16#include "globals.h"
0eb49b6d 17#include "MemBuf.h"
582c2af2 18#include "profiler/Profiler.h"
e4f1fdae 19#include "StatCounters.h"
090089c4 20
1a30fdf5 21#include <cerrno>
21d845b1 22
95d15928 23static PF diskHandleRead;
24static PF diskHandleWrite;
24382924 25
be266cb2 26#if _SQUID_WINDOWS_ || _SQUID_OS2_
1a94f598 27static int
28diskWriteIsComplete(int fd)
29{
30 return fd_table[fd].disk.write_q ? 0 : 1;
31}
62e76326 32
1a94f598 33#endif
34
59a09b98
FC
35/* hack needed on SunStudio to avoid linkage convention mismatch */
36static void cxx_xfree(void *ptr)
37{
f673997d 38 xfree(ptr);
59a09b98
FC
39}
40
1a94f598 41/*
42 * opens a disk file specified by 'path'. This function always
43 * blocks! There is no callback.
44 */
684c2720 45int
1a94f598 46file_open(const char *path, int mode)
090089c4 47{
090089c4 48 int fd;
88bfe092 49 PROF_start(file_open);
62e76326 50
5d1a7121 51 if (FILE_MODE(mode) == O_WRONLY)
62e76326 52 mode |= O_APPEND;
53
b870e0b4 54 errno = 0;
62e76326 55
0a0bf5db 56 fd = open(path, mode, 0644);
62e76326 57
95dc7ff4 58 ++ statCounter.syscalls.disk.opens;
62e76326 59
0a0bf5db 60 if (fd < 0) {
b69e9ffa
AJ
61 int xerrno = errno;
62 debugs(50, 3, "error opening file " << path << ": " << xstrerr(xerrno));
62e76326 63 fd = DISK_ERROR;
2391a162 64 } else {
b69e9ffa 65 debugs(6, 5, "FD " << fd);
62e76326 66 commSetCloseOnExec(fd);
67 fd_open(fd, FD_FILE, path);
090089c4 68 }
62e76326 69
88bfe092 70 PROF_stop(file_open);
2391a162 71 return fd;
0a0bf5db 72}
73
090089c4 74/* close a disk file. */
95d15928 75void
b8d8561b 76file_close(int fd)
090089c4 77{
76f87348 78 fde *F = &fd_table[fd];
2391a162 79 PF *read_callback;
88bfe092 80 PROF_start(file_close);
25354045 81 assert(fd >= 0);
60c0b5a2 82 assert(F->flags.open);
62e76326 83
2391a162 84 if ((read_callback = F->read_handler)) {
62e76326 85 F->read_handler = NULL;
86 read_callback(-1, F->read_data);
65d548bf 87 }
62e76326 88
0cd30ba5 89 if (F->flags.write_daemon) {
be266cb2 90#if _SQUID_WINDOWS_ || _SQUID_OS2_
62e76326 91 /*
92 * on some operating systems, you can not delete or rename
93 * open files, so we won't allow delayed close.
94 */
62e76326 95 while (!diskWriteIsComplete(fd))
96 diskHandleWrite(fd, NULL);
cd377065 97#else
be4d35dc 98 F->flags.close_request = true;
bf8fe701 99 debugs(6, 2, "file_close: FD " << fd << ", delaying close");
62e76326 100 PROF_stop(file_close);
62e76326 101 return;
cd377065 102#endif
62e76326 103
fb247d78 104 }
62e76326 105
65d548bf 106 /*
107 * Assert there is no write callback. Otherwise we might be
108 * leaking write state data by closing the descriptor
109 */
110 assert(F->write_handler == NULL);
62e76326 111
42f99d0d 112#if CALL_FSYNC_BEFORE_CLOSE
62e76326 113
42f99d0d 114 fsync(fd);
62e76326 115
42f99d0d 116#endif
62e76326 117
95d15928 118 close(fd);
62e76326 119
bf8fe701 120 debugs(6, F->flags.close_request ? 2 : 5, "file_close: FD " << fd << " really closing\n");
62e76326 121
6cf028ab 122 fd_close(fd);
62e76326 123
95dc7ff4 124 ++ statCounter.syscalls.disk.closes;
62e76326 125
88bfe092 126 PROF_stop(file_close);
090089c4 127}
128
f02b8498 129/*
130 * This function has the purpose of combining multiple writes. This is
131 * to facilitate the ASYNC_IO option since it can only guarantee 1
132 * write to a file per trip around the comm.c select() loop. That's bad
133 * because more than 1 write can be made to the access.log file per
134 * trip, and so this code is purely designed to help batch multiple
135 * sequential writes to the access.log file. Squid will never issue
136 * multiple writes for any other file type during 1 trip around the
137 * select() loop. --SLF
138 */
582b6456 139static void
5fed1735 140diskCombineWrites(_fde_disk *fdd)
090089c4 141{
f02b8498 142 /*
143 * We need to combine multiple write requests on an FD's write
144 * queue But only if we don't need to seek() in between them, ugh!
145 * XXX This currently ignores any seeks (file_offset)
146 */
62e76326 147
26ac0430 148 if (fdd->write_q != NULL && fdd->write_q->next != NULL) {
b115733c 149 int len = 0;
62e76326 150
b115733c 151 for (dwrite_q *q = fdd->write_q; q != NULL; q = q->next)
62e76326 152 len += q->len - q->buf_offset;
153
b115733c 154 dwrite_q *wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
62e76326 155
156 wq->buf = (char *)xmalloc(len);
157
158 wq->len = 0;
159
160 wq->buf_offset = 0;
161
162 wq->next = NULL;
163
59a09b98 164 wq->free_func = cxx_xfree;
62e76326 165
b115733c
AJ
166 while (fdd->write_q != NULL) {
167 dwrite_q *q = fdd->write_q;
168
62e76326 169 len = q->len - q->buf_offset;
41d00cd3 170 memcpy(wq->buf + wq->len, q->buf + q->buf_offset, len);
62e76326 171 wq->len += len;
172 fdd->write_q = q->next;
173
174 if (q->free_func)
6ca34f6f 175 q->free_func(q->buf);
62e76326 176
b115733c
AJ
177 memFree(q, MEM_DWRITE_Q);
178 };
62e76326 179
180 fdd->write_q_tail = wq;
181
182 fdd->write_q = wq;
0a0bf5db 183 }
f02b8498 184}
185
186/* write handler */
187static void
ced8def3 188diskHandleWrite(int fd, void *)
f02b8498 189{
190 int len = 0;
f02b8498 191 fde *F = &fd_table[fd];
62e76326 192
5fed1735 193 _fde_disk *fdd = &F->disk;
2391a162 194 dwrite_q *q = fdd->write_q;
195 int status = DISK_OK;
be4d35dc 196 bool do_close;
62e76326 197
2391a162 198 if (NULL == q)
62e76326 199 return;
200
88bfe092 201 PROF_start(diskHandleWrite);
62e76326 202
bf8fe701 203 debugs(6, 3, "diskHandleWrite: FD " << fd);
62e76326 204
be4d35dc 205 F->flags.write_daemon = false;
62e76326 206
8350fe9b 207 assert(fdd->write_q != NULL);
62e76326 208
d377699f 209 assert(fdd->write_q->len > fdd->write_q->buf_offset);
62e76326 210
e2851fe7
AR
211 debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " <<
212 (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes at " <<
213 fdd->write_q->file_offset);
62e76326 214
b870e0b4 215 errno = 0;
62e76326 216
914fbbba
FC
217 if (fdd->write_q->file_offset != -1) {
218 errno = 0;
219 if (lseek(fd, fdd->write_q->file_offset, SEEK_SET) == -1) {
b69e9ffa
AJ
220 int xerrno = errno;
221 debugs(50, DBG_IMPORTANT, "error in seek for FD " << fd << ": " << xstrerr(xerrno));
914fbbba
FC
222 // XXX: handle error?
223 }
224 }
62e76326 225
1f7c9178 226 len = FD_WRITE_METHOD(fd,
62e76326 227 fdd->write_q->buf + fdd->write_q->buf_offset,
228 fdd->write_q->len - fdd->write_q->buf_offset);
229
bf8fe701 230 debugs(6, 3, "diskHandleWrite: FD " << fd << " len = " << len);
62e76326 231
95dc7ff4 232 ++ statCounter.syscalls.disk.writes;
62e76326 233
6cf028ab 234 fd_bytes(fd, len, FD_WRITE);
62e76326 235
0a0bf5db 236 if (len < 0) {
62e76326 237 if (!ignoreErrno(errno)) {
238 status = errno == ENOSPC ? DISK_NO_SPACE_LEFT : DISK_ERROR;
b69e9ffa
AJ
239 int xerrno = errno;
240 debugs(50, DBG_IMPORTANT, "diskHandleWrite: FD " << fd << ": disk write error: " << xstrerr(xerrno));
bf8fe701 241
62e76326 242 /*
243 * If there is no write callback, then this file is
244 * most likely something important like a log file, or
245 * an interprocess pipe. Its not a swapfile. We feel
246 * that a write failure on a log file is rather important,
247 * and Squid doesn't otherwise deal with this condition.
248 * So to get the administrators attention, we exit with
249 * a fatal message.
250 */
251
252 if (fdd->wrt_handle == NULL)
253 fatal("Write failure -- check your disk space and cache.log");
254
255 /*
256 * If there is a write failure, then we notify the
257 * upper layer via the callback, at the end of this
258 * function. Meanwhile, flush all pending buffers
259 * here. Let the upper layer decide how to handle the
260 * failure. This will prevent experiencing multiple,
261 * repeated write failures for the same FD because of
262 * the queued data.
263 */
264 do {
265 fdd->write_q = q->next;
266
267 if (q->free_func)
6ca34f6f 268 q->free_func(q->buf);
62e76326 269
270 if (q) {
271 memFree(q, MEM_DWRITE_Q);
272 q = NULL;
273 }
274 } while ((q = fdd->write_q));
275 }
276
277 len = 0;
0a0bf5db 278 }
62e76326 279
8350fe9b 280 if (q != NULL) {
62e76326 281 /* q might become NULL from write failure above */
282 q->buf_offset += len;
283
284 if (q->buf_offset > q->len)
e0236918 285 debugs(50, DBG_IMPORTANT, "diskHandleWriteComplete: q->buf_offset > q->len (" <<
bf8fe701 286 q << "," << (int) q->buf_offset << ", " << q->len << ", " <<
287 len << " FD " << fd << ")");
288
62e76326 289 assert(q->buf_offset <= q->len);
290
291 if (q->buf_offset == q->len) {
292 /* complete write */
293 fdd->write_q = q->next;
294
295 if (q->free_func)
6ca34f6f 296 q->free_func(q->buf);
62e76326 297
298 if (q) {
299 memFree(q, MEM_DWRITE_Q);
300 q = NULL;
301 }
302 }
090089c4 303 }
62e76326 304
de866d20 305 if (fdd->write_q == NULL) {
62e76326 306 /* no more data */
307 fdd->write_q_tail = NULL;
de866d20 308 } else {
62e76326 309 /* another block is queued */
310 diskCombineWrites(fdd);
d841c88d 311 Comm::SetSelect(fd, COMM_SELECT_WRITE, diskHandleWrite, NULL, 0);
be4d35dc 312 F->flags.write_daemon = true;
4a86108c 313 }
62e76326 314
0cd30ba5 315 do_close = F->flags.close_request;
62e76326 316
25354045 317 if (fdd->wrt_handle) {
62e76326 318 DWCB *callback = fdd->wrt_handle;
319 void *cbdata;
320 fdd->wrt_handle = NULL;
321
322 if (cbdataReferenceValidDone(fdd->wrt_handle_data, &cbdata)) {
323 callback(fd, status, len, cbdata);
324 /*
325 * NOTE, this callback can close the FD, so we must
326 * not touch 'F', 'fdd', etc. after this.
327 */
328 PROF_stop(diskHandleWrite);
329 return;
330 /* XXX But what about close_request??? */
331 }
25354045 332 }
62e76326 333
68c21f71 334 if (do_close)
62e76326 335 file_close(fd);
336
88bfe092 337 PROF_stop(diskHandleWrite);
090089c4 338}
339
090089c4 340/* write block to a file */
341/* write back queue. Only one writer at a time. */
342/* call a handle when writing is complete. */
e3ef2b09 343void
3ebcfaa1 344file_write(int fd,
62e76326 345 off_t file_offset,
346 void const *ptr_to_buf,
347 int len,
348 DWCB * handle,
349 void *handle_data,
350 FREE * free_func)
090089c4 351{
c6ac7aae 352 dwrite_q *wq = NULL;
48cc3fcf 353 fde *F = &fd_table[fd];
88bfe092 354 PROF_start(file_write);
48cc3fcf 355 assert(fd >= 0);
60c0b5a2 356 assert(F->flags.open);
090089c4 357 /* if we got here. Caller is eligible to write. */
e6ccf245 358 wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
d377699f 359 wq->file_offset = file_offset;
e6ccf245 360 wq->buf = (char *)ptr_to_buf;
090089c4 361 wq->len = len;
d377699f 362 wq->buf_offset = 0;
090089c4 363 wq->next = NULL;
ed7f0b6a 364 wq->free_func = free_func;
62e76326 365
fa80a8ef 366 if (!F->disk.wrt_handle_data) {
62e76326 367 F->disk.wrt_handle = handle;
368 F->disk.wrt_handle_data = cbdataReference(handle_data);
fa80a8ef 369 } else {
62e76326 370 /* Detect if there is multiple concurrent users of this fd.. we only support one callback */
371 assert(F->disk.wrt_handle_data == handle_data && F->disk.wrt_handle == handle);
fa80a8ef 372 }
62e76326 373
090089c4 374 /* add to queue */
48cc3fcf 375 if (F->disk.write_q == NULL) {
62e76326 376 /* empty queue */
377 F->disk.write_q = F->disk.write_q_tail = wq;
090089c4 378 } else {
62e76326 379 F->disk.write_q_tail->next = wq;
380 F->disk.write_q_tail = wq;
090089c4 381 }
62e76326 382
0cd30ba5 383 if (!F->flags.write_daemon) {
62e76326 384 diskHandleWrite(fd, NULL);
429fdbec 385 }
62e76326 386
88bfe092 387 PROF_stop(file_write);
090089c4 388}
389
23b2b404 390/*
391 * a wrapper around file_write to allow for MemBuf to be file_written
392 * in a snap
393 */
137ee196 394void
395file_write_mbuf(int fd, off_t off, MemBuf mb, DWCB * handler, void *handler_data)
396{
2fe7eff9 397 file_write(fd, off, mb.buf, mb.size, handler, handler_data, mb.freeFunc());
137ee196 398}
090089c4 399
400/* Read from FD */
582b6456 401static void
402diskHandleRead(int fd, void *data)
090089c4 403{
e6ccf245 404 dread_ctrl *ctrl_dat = (dread_ctrl *)data;
edd2eb63 405 fde *F = &fd_table[fd];
090089c4 406 int len;
2391a162 407 int rc = DISK_OK;
b69e9ffa
AJ
408 int xerrno;
409
65d548bf 410 /*
411 * FD < 0 indicates premature close; we just have to free
412 * the state data.
413 */
62e76326 414
65d548bf 415 if (fd < 0) {
62e76326 416 memFree(ctrl_dat, MEM_DREAD_CTRL);
417 return;
65d548bf 418 }
62e76326 419
88bfe092 420 PROF_start(diskHandleRead);
62e76326 421
034b5ea4 422#if WRITES_MAINTAIN_DISK_OFFSET
711982d8 423 if (F->disk.offset != ctrl_dat->offset) {
034b5ea4
AR
424#else
425 {
426#endif
4a7a3d56 427 debugs(6, 3, "diskHandleRead: FD " << fd << " seeking to offset " << ctrl_dat->offset);
4ab1af1f
FC
428 errno = 0;
429 if (lseek(fd, ctrl_dat->offset, SEEK_SET) == -1) {
b69e9ffa 430 xerrno = errno;
4ab1af1f 431 // shouldn't happen, let's detect that
b69e9ffa 432 debugs(50, DBG_IMPORTANT, "error in seek for FD " << fd << ": " << xstrerr(xerrno));
4ab1af1f
FC
433 // XXX handle failures?
434 }
95dc7ff4 435 ++ statCounter.syscalls.disk.seeks;
62e76326 436 F->disk.offset = ctrl_dat->offset;
711982d8 437 }
62e76326 438
b870e0b4 439 errno = 0;
1f7c9178 440 len = FD_READ_METHOD(fd, ctrl_dat->buf, ctrl_dat->req_len);
b69e9ffa 441 xerrno = errno;
62e76326 442
015b507a 443 if (len > 0)
62e76326 444 F->disk.offset += len;
445
95dc7ff4 446 ++ statCounter.syscalls.disk.reads;
62e76326 447
4f92c80c 448 fd_bytes(fd, len, FD_READ);
62e76326 449
0a0bf5db 450 if (len < 0) {
b69e9ffa 451 if (ignoreErrno(xerrno)) {
d841c88d 452 Comm::SetSelect(fd, COMM_SELECT_READ, diskHandleRead, ctrl_dat, 0);
62e76326 453 PROF_stop(diskHandleRead);
454 return;
455 }
456
b69e9ffa 457 debugs(50, DBG_IMPORTANT, "diskHandleRead: FD " << fd << ": " << xstrerr(xerrno));
62e76326 458 len = 0;
459 rc = DISK_ERROR;
090089c4 460 } else if (len == 0) {
62e76326 461 rc = DISK_EOF;
090089c4 462 }
62e76326 463
fa80a8ef 464 if (cbdataReferenceValid(ctrl_dat->client_data))
62e76326 465 ctrl_dat->handler(fd, ctrl_dat->buf, len, rc, ctrl_dat->client_data);
466
fa80a8ef 467 cbdataReferenceDone(ctrl_dat->client_data);
62e76326 468
db1cd23c 469 memFree(ctrl_dat, MEM_DREAD_CTRL);
62e76326 470
88bfe092 471 PROF_stop(diskHandleRead);
090089c4 472}
473
090089c4 474/* start read operation */
62e76326 475/* buffer must be allocated from the caller.
26ac0430 476 * It must have at least req_len space in there.
090089c4 477 * call handler when a reading is complete. */
2391a162 478void
d377699f 479file_read(int fd, char *buf, int req_len, off_t offset, DRCB * handler, void *client_data)
090089c4 480{
481 dread_ctrl *ctrl_dat;
88bfe092 482 PROF_start(file_read);
711982d8 483 assert(fd >= 0);
e6ccf245 484 ctrl_dat = (dread_ctrl *)memAllocate(MEM_DREAD_CTRL);
090089c4 485 ctrl_dat->fd = fd;
486 ctrl_dat->offset = offset;
487 ctrl_dat->req_len = req_len;
488 ctrl_dat->buf = buf;
090089c4 489 ctrl_dat->end_of_file = 0;
490 ctrl_dat->handler = handler;
fa80a8ef 491 ctrl_dat->client_data = cbdataReference(client_data);
0a0bf5db 492 diskHandleRead(fd, ctrl_dat);
88bfe092 493 PROF_stop(file_read);
090089c4 494}
c8f4eac4 495
496void
497safeunlink(const char *s, int quiet)
498{
95dc7ff4 499 ++ statCounter.syscalls.disk.unlinks;
c8f4eac4 500
b69e9ffa
AJ
501 if (unlink(s) < 0 && !quiet) {
502 int xerrno = errno;
503 debugs(50, DBG_IMPORTANT, "safeunlink: Couldn't delete " << s << ": " << xstrerr(xerrno));
504 }
c8f4eac4 505}
506
507/*
508 * Same as rename(2) but complains if something goes wrong;
26ac0430 509 * the caller is responsible for handing and explaining the
c8f4eac4 510 * consequences of errors.
511 */
512int
513xrename(const char *from, const char *to)
514{
bf8fe701 515 debugs(21, 2, "xrename: renaming " << from << " to " << to);
be266cb2 516#if _SQUID_OS2_ || _SQUID_WINDOWS_
6ca34f6f 517 remove(to);
c8f4eac4 518#endif
519
520 if (0 == rename(from, to))
521 return 0;
522
b69e9ffa
AJ
523 int xerrno = errno;
524 debugs(21, errno == ENOENT ? 2 : 1, "xrename: Cannot rename " << from << " to " << to << ": " << xstrerr(xerrno));
c8f4eac4 525
526 return -1;
527}
528
2745fea5
AR
529int
530fsBlockSize(const char *path, int *blksize)
531{
532 struct statvfs sfs;
533
534 if (xstatvfs(path, &sfs)) {
b69e9ffa
AJ
535 int xerrno = errno;
536 debugs(50, DBG_IMPORTANT, "" << path << ": " << xstrerr(xerrno));
2745fea5
AR
537 *blksize = 2048;
538 return 1;
539 }
540
541 *blksize = (int) sfs.f_frsize;
542
543 // Sanity check; make sure we have a meaningful value.
544 if (*blksize < 512)
545 *blksize = 2048;
546
547 return 0;
548}
549
550#define fsbtoblk(num, fsbs, bs) \
551 (((fsbs) != 0 && (fsbs) < (bs)) ? \
552 (num) / ((bs) / (fsbs)) : (num) * ((fsbs) / (bs)))
553int
554fsStats(const char *path, int *totl_kb, int *free_kb, int *totl_in, int *free_in)
555{
556 struct statvfs sfs;
557
558 if (xstatvfs(path, &sfs)) {
b69e9ffa
AJ
559 int xerrno = errno;
560 debugs(50, DBG_IMPORTANT, "" << path << ": " << xstrerr(xerrno));
2745fea5
AR
561 return 1;
562 }
563
564 *totl_kb = (int) fsbtoblk(sfs.f_blocks, sfs.f_frsize, 1024);
565 *free_kb = (int) fsbtoblk(sfs.f_bfree, sfs.f_frsize, 1024);
566 *totl_in = (int) sfs.f_files;
567 *free_in = (int) sfs.f_ffree;
568 return 0;
569}
7d84d4ca 570