src/fs_io.cc

   1 /*
   2  * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 /* DEBUG: section 06    Disk I/O Routines */
  10
  11 #include "squid.h"
  12 #include "comm/Loops.h"
  13 #include "fd.h"
  14 #include "fde.h"
  15 #include "fs_io.h"
  16 #include "globals.h"
  17 #include "MemBuf.h"
  18 #include "StatCounters.h"
  19
  20 #include <cerrno>
  21
  22 static PF diskHandleRead;
  23 static PF diskHandleWrite;
  24
  25 #if _SQUID_WINDOWS_ || _SQUID_OS2_
  26 static int
  27 diskWriteIsComplete(int fd)
  28 {
  29     return fd_table[fd].disk.write_q ? 0 : 1;
  30 }
  31
  32 #endif
  33
  34 /* hack needed on SunStudio to avoid linkage convention mismatch */
  35 static void cxx_xfree(void *ptr)
  36 {
  37     xfree(ptr);
  38 }
  39
  40 /*
  41  * opens a disk file specified by 'path'.  This function always
  42  * blocks!  There is no callback.
  43  */
  44 int
  45 file_open(const char *path, int mode)
  46 {
  47     int fd;
  48
  49     if (FILE_MODE(mode) == O_WRONLY)
  50         mode |= O_APPEND;
  51
  52     errno = 0;
  53
  54     fd = open(path, mode, 0644);
  55
  56     ++ statCounter.syscalls.disk.opens;
  57
  58     if (fd < 0) {
  59         int xerrno = errno;
  60         debugs(50, 3, "error opening file " << path << ": " << xstrerr(xerrno));
  61         fd = DISK_ERROR;
  62     } else {
  63         debugs(6, 5, "FD " << fd);
  64         commSetCloseOnExec(fd);
  65         fd_open(fd, FD_FILE, path);
  66     }
  67
  68     return fd;
  69 }
  70
  71 /* close a disk file. */
  72 void
  73 file_close(int fd)
  74 {
  75     fde *F = &fd_table[fd];
  76     PF *read_callback;
  77     assert(fd >= 0);
  78     assert(F->flags.open);
  79
  80     if ((read_callback = F->read_handler)) {
  81         F->read_handler = nullptr;
  82         read_callback(-1, F->read_data);
  83     }
  84
  85     if (F->flags.write_daemon) {
  86 #if _SQUID_WINDOWS_ || _SQUID_OS2_
  87         /*
  88          * on some operating systems, you can not delete or rename
  89          * open files, so we won't allow delayed close.
  90          */
  91         while (!diskWriteIsComplete(fd))
  92             diskHandleWrite(fd, NULL);
  93 #else
  94         F->flags.close_request = true;
  95         debugs(6, 2, "file_close: FD " << fd << ", delaying close");
  96         return;
  97 #endif
  98
  99     }
 100
 101     /*
 102      * Assert there is no write callback.  Otherwise we might be
 103      * leaking write state data by closing the descriptor
 104      */
 105     assert(F->write_handler == nullptr);
 106
 107     close(fd);
 108
 109     debugs(6, F->flags.close_request ? 2 : 5, "file_close: FD " << fd << " really closing");
 110
 111     fd_close(fd);
 112
 113     ++ statCounter.syscalls.disk.closes;
 114 }
 115
 116 /*
 117  * This function has the purpose of combining multiple writes.  This is
 118  * to facilitate the ASYNC_IO option since it can only guarantee 1
 119  * write to a file per trip around the comm.c select() loop. That's bad
 120  * because more than 1 write can be made to the access.log file per
 121  * trip, and so this code is purely designed to help batch multiple
 122  * sequential writes to the access.log file.  Squid will never issue
 123  * multiple writes for any other file type during 1 trip around the
 124  * select() loop.       --SLF
 125  */
 126 static void
 127 diskCombineWrites(_fde_disk *fdd)
 128 {
 129     /*
 130      * We need to combine multiple write requests on an FD's write
 131      * queue But only if we don't need to seek() in between them, ugh!
 132      * XXX This currently ignores any seeks (file_offset)
 133      */
 134
 135     if (fdd->write_q != nullptr && fdd->write_q->next != nullptr) {
 136         int len = 0;
 137
 138         for (dwrite_q *q = fdd->write_q; q != nullptr; q = q->next)
 139             len += q->len - q->buf_offset;
 140
 141         dwrite_q *wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
 142
 143         wq->buf = (char *)xmalloc(len);
 144
 145         wq->len = 0;
 146
 147         wq->buf_offset = 0;
 148
 149         wq->next = nullptr;
 150
 151         wq->free_func = cxx_xfree;
 152
 153         while (fdd->write_q != nullptr) {
 154             dwrite_q *q = fdd->write_q;
 155
 156             len = q->len - q->buf_offset;
 157             memcpy(wq->buf + wq->len, q->buf + q->buf_offset, len);
 158             wq->len += len;
 159             fdd->write_q = q->next;
 160
 161             if (q->free_func)
 162                 q->free_func(q->buf);
 163
 164             memFree(q, MEM_DWRITE_Q);
 165         };
 166
 167         fdd->write_q_tail = wq;
 168
 169         fdd->write_q = wq;
 170     }
 171 }
 172
 173 /* write handler */
 174 static void
 175 diskHandleWrite(int fd, void *)
 176 {
 177     int len = 0;
 178     fde *F = &fd_table[fd];
 179
 180     _fde_disk *fdd = &F->disk;
 181     dwrite_q *q = fdd->write_q;
 182     int status = DISK_OK;
 183     bool do_close;
 184
 185     if (nullptr == q)
 186         return;
 187
 188     debugs(6, 3, "diskHandleWrite: FD " << fd);
 189
 190     F->flags.write_daemon = false;
 191
 192     assert(fdd->write_q != nullptr);
 193
 194     assert(fdd->write_q->len > fdd->write_q->buf_offset);
 195
 196     debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " <<
 197            (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes at " <<
 198            fdd->write_q->file_offset);
 199
 200     errno = 0;
 201
 202     if (fdd->write_q->file_offset != -1) {
 203         errno = 0;
 204         if (lseek(fd, fdd->write_q->file_offset, SEEK_SET) == -1) {
 205             int xerrno = errno;
 206             debugs(50, DBG_IMPORTANT, "ERROR: in seek for FD " << fd << ": " << xstrerr(xerrno));
 207             // XXX: handle error?
 208         }
 209     }
 210
 211     len = FD_WRITE_METHOD(fd,
 212                           fdd->write_q->buf + fdd->write_q->buf_offset,
 213                           fdd->write_q->len - fdd->write_q->buf_offset);
 214     const auto xerrno = errno;
 215
 216     debugs(6, 3, "diskHandleWrite: FD " << fd << " len = " << len);
 217
 218     ++ statCounter.syscalls.disk.writes;
 219
 220     fd_bytes(fd, len, FD_WRITE);
 221
 222     if (len < 0) {
 223         if (!ignoreErrno(xerrno)) {
 224             status = xerrno == ENOSPC ? DISK_NO_SPACE_LEFT : DISK_ERROR;
 225             debugs(50, DBG_IMPORTANT, "ERROR: diskHandleWrite: FD " << fd << ": disk write failure: " << xstrerr(xerrno));
 226
 227             /*
 228              * If there is no write callback, then this file is
 229              * most likely something important like a log file, or
 230              * an interprocess pipe.  Its not a swapfile.  We feel
 231              * that a write failure on a log file is rather important,
 232              * and Squid doesn't otherwise deal with this condition.
 233              * So to get the administrators attention, we exit with
 234              * a fatal message.
 235              */
 236
 237             if (fdd->wrt_handle == nullptr)
 238                 fatal("Write failure -- check your disk space and cache.log");
 239
 240             /*
 241              * If there is a write failure, then we notify the
 242              * upper layer via the callback, at the end of this
 243              * function.  Meanwhile, flush all pending buffers
 244              * here.  Let the upper layer decide how to handle the
 245              * failure.  This will prevent experiencing multiple,
 246              * repeated write failures for the same FD because of
 247              * the queued data.
 248              */
 249             do {
 250                 fdd->write_q = q->next;
 251
 252                 if (q->free_func)
 253                     q->free_func(q->buf);
 254
 255                 if (q) {
 256                     memFree(q, MEM_DWRITE_Q);
 257                     q = nullptr;
 258                 }
 259             } while ((q = fdd->write_q));
 260         }
 261
 262         len = 0;
 263     }
 264
 265     if (q != nullptr) {
 266         /* q might become NULL from write failure above */
 267         q->buf_offset += len;
 268
 269         if (q->buf_offset > q->len)
 270             debugs(50, DBG_IMPORTANT, "diskHandleWriteComplete: q->buf_offset > q->len (" <<
 271                    q << "," << (int) q->buf_offset << ", " << q->len << ", " <<
 272                    len << " FD " << fd << ")");
 273
 274         assert(q->buf_offset <= q->len);
 275
 276         if (q->buf_offset == q->len) {
 277             /* complete write */
 278             fdd->write_q = q->next;
 279
 280             if (q->free_func)
 281                 q->free_func(q->buf);
 282
 283             if (q) {
 284                 memFree(q, MEM_DWRITE_Q);
 285                 q = nullptr;
 286             }
 287         }
 288     }
 289
 290     if (fdd->write_q == nullptr) {
 291         /* no more data */
 292         fdd->write_q_tail = nullptr;
 293     } else {
 294         /* another block is queued */
 295         diskCombineWrites(fdd);
 296         Comm::SetSelect(fd, COMM_SELECT_WRITE, diskHandleWrite, nullptr, 0);
 297         F->flags.write_daemon = true;
 298     }
 299
 300     do_close = F->flags.close_request;
 301
 302     if (fdd->wrt_handle) {
 303         DWCB *callback = fdd->wrt_handle;
 304         void *cbdata;
 305         fdd->wrt_handle = nullptr;
 306
 307         if (cbdataReferenceValidDone(fdd->wrt_handle_data, &cbdata)) {
 308             callback(fd, status, len, cbdata);
 309             /*
 310              * NOTE, this callback can close the FD, so we must
 311              * not touch 'F', 'fdd', etc. after this.
 312              */
 313             return;
 314             /* XXX But what about close_request??? */
 315         }
 316     }
 317
 318     if (do_close)
 319         file_close(fd);
 320 }
 321
 322 /* write block to a file */
 323 /* write back queue. Only one writer at a time. */
 324 /* call a handle when writing is complete. */
 325 void
 326 file_write(int fd,
 327            off_t file_offset,
 328            void const *ptr_to_buf,
 329            int len,
 330            DWCB * handle,
 331            void *handle_data,
 332            FREE * free_func)
 333 {
 334     dwrite_q *wq = nullptr;
 335     fde *F = &fd_table[fd];
 336     assert(fd >= 0);
 337     assert(F->flags.open);
 338     /* if we got here. Caller is eligible to write. */
 339     wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
 340     wq->file_offset = file_offset;
 341     wq->buf = (char *)ptr_to_buf;
 342     wq->len = len;
 343     wq->buf_offset = 0;
 344     wq->next = nullptr;
 345     wq->free_func = free_func;
 346
 347     if (!F->disk.wrt_handle_data) {
 348         F->disk.wrt_handle = handle;
 349         F->disk.wrt_handle_data = cbdataReference(handle_data);
 350     } else {
 351         /* Detect if there is multiple concurrent users of this fd.. we only support one callback */
 352         assert(F->disk.wrt_handle_data == handle_data && F->disk.wrt_handle == handle);
 353     }
 354
 355     /* add to queue */
 356     if (F->disk.write_q == nullptr) {
 357         /* empty queue */
 358         F->disk.write_q = F->disk.write_q_tail = wq;
 359     } else {
 360         F->disk.write_q_tail->next = wq;
 361         F->disk.write_q_tail = wq;
 362     }
 363
 364     if (!F->flags.write_daemon) {
 365         diskHandleWrite(fd, nullptr);
 366     }
 367 }
 368
 369 /*
 370  * a wrapper around file_write to allow for MemBuf to be file_written
 371  * in a snap
 372  */
 373 void
 374 file_write_mbuf(int fd, off_t off, MemBuf mb, DWCB * handler, void *handler_data)
 375 {
 376     file_write(fd, off, mb.buf, mb.size, handler, handler_data, mb.freeFunc());
 377 }
 378
 379 /* Read from FD */
 380 static void
 381 diskHandleRead(int fd, void *data)
 382 {
 383     dread_ctrl *ctrl_dat = (dread_ctrl *)data;
 384     fde *F = &fd_table[fd];
 385     int len;
 386     int rc = DISK_OK;
 387     int xerrno;
 388
 389     /*
 390      * FD < 0 indicates premature close; we just have to free
 391      * the state data.
 392      */
 393
 394     if (fd < 0) {
 395         memFree(ctrl_dat, MEM_DREAD_CTRL);
 396         return;
 397     }
 398
 399 #if WRITES_MAINTAIN_DISK_OFFSET
 400     if (F->disk.offset != ctrl_dat->offset) {
 401 #else
 402     {
 403 #endif
 404         debugs(6, 3, "diskHandleRead: FD " << fd << " seeking to offset " << ctrl_dat->offset);
 405         errno = 0;
 406         if (lseek(fd, ctrl_dat->offset, SEEK_SET) == -1) {
 407             xerrno = errno;
 408             // shouldn't happen, let's detect that
 409             debugs(50, DBG_IMPORTANT, "ERROR: in seek for FD " << fd << ": " << xstrerr(xerrno));
 410             // XXX handle failures?
 411         }
 412         ++ statCounter.syscalls.disk.seeks;
 413         F->disk.offset = ctrl_dat->offset;
 414     }
 415
 416     errno = 0;
 417     len = FD_READ_METHOD(fd, ctrl_dat->buf, ctrl_dat->req_len);
 418     xerrno = errno;
 419
 420     if (len > 0)
 421         F->disk.offset += len;
 422
 423     ++ statCounter.syscalls.disk.reads;
 424
 425     fd_bytes(fd, len, FD_READ);
 426
 427     if (len < 0) {
 428         if (ignoreErrno(xerrno)) {
 429             Comm::SetSelect(fd, COMM_SELECT_READ, diskHandleRead, ctrl_dat, 0);
 430             return;
 431         }
 432
 433         debugs(50, DBG_IMPORTANT, "diskHandleRead: FD " << fd << ": " << xstrerr(xerrno));
 434         len = 0;
 435         rc = DISK_ERROR;
 436     } else if (len == 0) {
 437         rc = DISK_EOF;
 438     }
 439
 440     if (cbdataReferenceValid(ctrl_dat->client_data))
 441         ctrl_dat->handler(fd, ctrl_dat->buf, len, rc, ctrl_dat->client_data);
 442
 443     cbdataReferenceDone(ctrl_dat->client_data);
 444
 445     memFree(ctrl_dat, MEM_DREAD_CTRL);
 446 }
 447
 448 /* start read operation */
 449 /* buffer must be allocated from the caller.
 450  * It must have at least req_len space in there.
 451  * call handler when a reading is complete. */
 452 void
 453 file_read(int fd, char *buf, int req_len, off_t offset, DRCB * handler, void *client_data)
 454 {
 455     dread_ctrl *ctrl_dat;
 456     assert(fd >= 0);
 457     ctrl_dat = (dread_ctrl *)memAllocate(MEM_DREAD_CTRL);
 458     ctrl_dat->fd = fd;
 459     ctrl_dat->offset = offset;
 460     ctrl_dat->req_len = req_len;
 461     ctrl_dat->buf = buf;
 462     ctrl_dat->end_of_file = 0;
 463     ctrl_dat->handler = handler;
 464     ctrl_dat->client_data = cbdataReference(client_data);
 465     diskHandleRead(fd, ctrl_dat);
 466 }
 467
 468 void
 469 safeunlink(const char *s, int quiet)
 470 {
 471     ++ statCounter.syscalls.disk.unlinks;
 472
 473     if (unlink(s) < 0 && !quiet) {
 474         int xerrno = errno;
 475         debugs(50, DBG_IMPORTANT, "ERROR: safeunlink: Could not delete " << s << ": " << xstrerr(xerrno));
 476     }
 477 }
 478
 479 bool
 480 FileRename(const SBuf &from, const SBuf &to)
 481 {
 482     debugs(21, 2, "renaming " << from << " to " << to);
 483
 484     // non-const copy for c_str()
 485     SBuf from2(from);
 486     // ensure c_str() lifetimes even if `to` and `from` share memory
 487     SBuf to2(to.rawContent(), to.length());
 488
 489 #if _SQUID_OS2_ || _SQUID_WINDOWS_
 490     remove(to2.c_str());
 491 #endif
 492
 493     if (rename(from2.c_str(), to2.c_str()) == 0)
 494         return true;
 495
 496     int xerrno = errno;
 497     debugs(21, (xerrno == ENOENT ? 2 : DBG_IMPORTANT), "ERROR: Cannot rename " << from << " to " << to << ": " << xstrerr(xerrno));
 498
 499     return false;
 500 }
 501
 502 int
 503 fsBlockSize(const char *path, int *blksize)
 504 {
 505     struct statvfs sfs;
 506
 507     if (xstatvfs(path, &sfs)) {
 508         int xerrno = errno;
 509         debugs(50, DBG_IMPORTANT, "" << path << ": " << xstrerr(xerrno));
 510         *blksize = 2048;
 511         return 1;
 512     }
 513
 514     *blksize = (int) sfs.f_frsize;
 515
 516     // Sanity check; make sure we have a meaningful value.
 517     if (*blksize < 512)
 518         *blksize = 2048;
 519
 520     return 0;
 521 }
 522
 523 #define fsbtoblk(num, fsbs, bs) \
 524     (((fsbs) != 0 && (fsbs) < (bs)) ? \
 525             (num) / ((bs) / (fsbs)) : (num) * ((fsbs) / (bs)))
 526 int
 527 fsStats(const char *path, int *totl_kb, int *free_kb, int *totl_in, int *free_in)
 528 {
 529     struct statvfs sfs;
 530
 531     if (xstatvfs(path, &sfs)) {
 532         int xerrno = errno;
 533         debugs(50, DBG_IMPORTANT, "" << path << ": " << xstrerr(xerrno));
 534         return 1;
 535     }
 536
 537     *totl_kb = (int) fsbtoblk(sfs.f_blocks, sfs.f_frsize, 1024);
 538     *free_kb = (int) fsbtoblk(sfs.f_bfree, sfs.f_frsize, 1024);
 539     *totl_in = (int) sfs.f_files;
 540     *free_in = (int) sfs.f_ffree;
 541     return 0;
 542 }
 543