src/disk.cc

   1
   2 /*
   3  * $Id: disk.cc,v 1.173 2007/04/30 16:56:09 wessels Exp $
   4  *
   5  * DEBUG: section 6     Disk I/O Routines
   6  * AUTHOR: Harvest Derived
   7  *
   8  * SQUID Web Proxy Cache          http://www.squid-cache.org/
   9  * ----------------------------------------------------------
  10  *
  11  *  Squid is the result of efforts by numerous individuals from
  12  *  the Internet community; see the CONTRIBUTORS file for full
  13  *  details.   Many organizations have provided support for Squid's
  14  *  development; see the SPONSORS file for full details.  Squid is
  15  *  Copyrighted (C) 2001 by the Regents of the University of
  16  *  California; see the COPYRIGHT file for full details.  Squid
  17  *  incorporates software developed and/or copyrighted by other
  18  *  sources; see the CREDITS file for full details.
  19  *
  20  *  This program is free software; you can redistribute it and/or modify
  21  *  it under the terms of the GNU General Public License as published by
  22  *  the Free Software Foundation; either version 2 of the License, or
  23  *  (at your option) any later version.
  24  *
  25  *  This program is distributed in the hope that it will be useful,
  26  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  27  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  28  *  GNU General Public License for more details.
  29  *
  30  *  You should have received a copy of the GNU General Public License
  31  *  along with this program; if not, write to the Free Software
  32  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
  33  *
  34  */
  35
  36 #include "squid.h"
  37 #include "fde.h"
  38 #include "MemBuf.h"
  39
  40 static PF diskHandleRead;
  41 static PF diskHandleWrite;
  42
  43 #if defined(_SQUID_WIN32_) || defined(_SQUID_OS2_)
  44 static int
  45 diskWriteIsComplete(int fd)
  46 {
  47     return fd_table[fd].disk.write_q ? 0 : 1;
  48 }
  49
  50 #endif
  51
  52 void
  53 disk_init(void)
  54 {
  55     (void) 0;
  56 }
  57
  58 /*
  59  * opens a disk file specified by 'path'.  This function always
  60  * blocks!  There is no callback.
  61  */
  62 int
  63 file_open(const char *path, int mode)
  64 {
  65     int fd;
  66     PROF_start(file_open);
  67
  68     if (FILE_MODE(mode) == O_WRONLY)
  69         mode |= O_APPEND;
  70
  71     errno = 0;
  72
  73     fd = open(path, mode, 0644);
  74
  75     statCounter.syscalls.disk.opens++;
  76
  77     if (fd < 0) {
  78         debugs(50, 3, "file_open: error opening file " << path << ": " << xstrerror());
  79         fd = DISK_ERROR;
  80     } else {
  81         debugs(6, 5, "file_open: FD " << fd);
  82         commSetCloseOnExec(fd);
  83         fd_open(fd, FD_FILE, path);
  84     }
  85
  86     PROF_stop(file_open);
  87     return fd;
  88 }
  89
  90
  91 /* close a disk file. */
  92 void
  93 file_close(int fd)
  94 {
  95     fde *F = &fd_table[fd];
  96     PF *read_callback;
  97     PROF_start(file_close);
  98     assert(fd >= 0);
  99     assert(F->flags.open);
 100
 101     if ((read_callback = F->read_handler)) {
 102         F->read_handler = NULL;
 103         read_callback(-1, F->read_data);
 104     }
 105
 106     if (F->flags.write_daemon) {
 107 #if defined(_SQUID_WIN32_) || defined(_SQUID_OS2_)
 108         /*
 109          * on some operating systems, you can not delete or rename
 110          * open files, so we won't allow delayed close.
 111          */
 112
 113         while (!diskWriteIsComplete(fd))
 114             diskHandleWrite(fd, NULL);
 115
 116 #else
 117
 118         F->flags.close_request = 1;
 119
 120         debugs(6, 2, "file_close: FD " << fd << ", delaying close");
 121
 122         PROF_stop(file_close);
 123
 124         return;
 125
 126 #endif
 127
 128     }
 129
 130     /*
 131      * Assert there is no write callback.  Otherwise we might be
 132      * leaking write state data by closing the descriptor
 133      */
 134     assert(F->write_handler == NULL);
 135
 136     F->flags.closing = 1;
 137
 138 #if CALL_FSYNC_BEFORE_CLOSE
 139
 140     fsync(fd);
 141
 142 #endif
 143
 144     close(fd);
 145
 146     debugs(6, F->flags.close_request ? 2 : 5, "file_close: FD " << fd << " really closing\n");
 147
 148     fd_close(fd);
 149
 150     statCounter.syscalls.disk.closes++;
 151
 152     PROF_stop(file_close);
 153 }
 154
 155 /*
 156  * This function has the purpose of combining multiple writes.  This is
 157  * to facilitate the ASYNC_IO option since it can only guarantee 1
 158  * write to a file per trip around the comm.c select() loop. That's bad
 159  * because more than 1 write can be made to the access.log file per
 160  * trip, and so this code is purely designed to help batch multiple
 161  * sequential writes to the access.log file.  Squid will never issue
 162  * multiple writes for any other file type during 1 trip around the
 163  * select() loop.       --SLF
 164  */
 165 static void
 166
 167 diskCombineWrites(struct _fde_disk *fdd)
 168 {
 169     int len = 0;
 170     dwrite_q *q = NULL;
 171     dwrite_q *wq = NULL;
 172     /*
 173      * We need to combine multiple write requests on an FD's write
 174      * queue But only if we don't need to seek() in between them, ugh!
 175      * XXX This currently ignores any seeks (file_offset)
 176      */
 177
 178     if (fdd->write_q != NULL && fdd->write_q->next != NULL)
 179     {
 180         len = 0;
 181
 182         for (q = fdd->write_q; q != NULL; q = q->next)
 183             len += q->len - q->buf_offset;
 184
 185         wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
 186
 187         wq->buf = (char *)xmalloc(len);
 188
 189         wq->len = 0;
 190
 191         wq->buf_offset = 0;
 192
 193         wq->next = NULL;
 194
 195         wq->free_func = xfree;
 196
 197         do {
 198             q = fdd->write_q;
 199             len = q->len - q->buf_offset;
 200             xmemcpy(wq->buf + wq->len, q->buf + q->buf_offset, len);
 201             wq->len += len;
 202             fdd->write_q = q->next;
 203
 204             if (q->free_func)
 205                 (q->free_func) (q->buf);
 206
 207             if (q) {
 208                 memFree(q, MEM_DWRITE_Q);
 209                 q = NULL;
 210             }
 211         } while (fdd->write_q != NULL);
 212
 213         fdd->write_q_tail = wq;
 214
 215         fdd->write_q = wq;
 216     }
 217 }
 218
 219 /* write handler */
 220 static void
 221 diskHandleWrite(int fd, void *notused)
 222 {
 223     int len = 0;
 224     fde *F = &fd_table[fd];
 225
 226     struct _fde_disk *fdd = &F->disk;
 227     dwrite_q *q = fdd->write_q;
 228     int status = DISK_OK;
 229     int do_close;
 230
 231     if (NULL == q)
 232         return;
 233
 234     PROF_start(diskHandleWrite);
 235
 236     debugs(6, 3, "diskHandleWrite: FD " << fd);
 237
 238     F->flags.write_daemon = 0;
 239
 240     assert(fdd->write_q != NULL);
 241
 242     assert(fdd->write_q->len > fdd->write_q->buf_offset);
 243
 244     debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " << (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes");
 245
 246     errno = 0;
 247
 248     if (fdd->write_q->file_offset != -1)
 249         lseek(fd, fdd->write_q->file_offset, SEEK_SET);
 250
 251     len = FD_WRITE_METHOD(fd,
 252                           fdd->write_q->buf + fdd->write_q->buf_offset,
 253                           fdd->write_q->len - fdd->write_q->buf_offset);
 254
 255     debugs(6, 3, "diskHandleWrite: FD " << fd << " len = " << len);
 256
 257     statCounter.syscalls.disk.writes++;
 258
 259     fd_bytes(fd, len, FD_WRITE);
 260
 261     if (len < 0) {
 262         if (!ignoreErrno(errno)) {
 263             status = errno == ENOSPC ? DISK_NO_SPACE_LEFT : DISK_ERROR;
 264             debugs(50, 1, "diskHandleWrite: FD " << fd << ": disk write error: " << xstrerror());
 265
 266             /*
 267              * If there is no write callback, then this file is
 268              * most likely something important like a log file, or
 269              * an interprocess pipe.  Its not a swapfile.  We feel
 270              * that a write failure on a log file is rather important,
 271              * and Squid doesn't otherwise deal with this condition.
 272              * So to get the administrators attention, we exit with
 273              * a fatal message.
 274              */
 275
 276             if (fdd->wrt_handle == NULL)
 277                 fatal("Write failure -- check your disk space and cache.log");
 278
 279             /*
 280              * If there is a write failure, then we notify the
 281              * upper layer via the callback, at the end of this
 282              * function.  Meanwhile, flush all pending buffers
 283              * here.  Let the upper layer decide how to handle the
 284              * failure.  This will prevent experiencing multiple,
 285              * repeated write failures for the same FD because of
 286              * the queued data.
 287              */
 288             do {
 289                 fdd->write_q = q->next;
 290
 291                 if (q->free_func)
 292                     (q->free_func) (q->buf);
 293
 294                 if (q) {
 295                     memFree(q, MEM_DWRITE_Q);
 296                     q = NULL;
 297                 }
 298             } while ((q = fdd->write_q));
 299         }
 300
 301         len = 0;
 302     }
 303
 304     if (q != NULL) {
 305         /* q might become NULL from write failure above */
 306         q->buf_offset += len;
 307
 308         if (q->buf_offset > q->len)
 309             debugs(50, 1, "diskHandleWriteComplete: q->buf_offset > q->len (" <<
 310                    q << "," << (int) q->buf_offset << ", " << q->len << ", " <<
 311                    len << " FD " << fd << ")");
 312
 313
 314         assert(q->buf_offset <= q->len);
 315
 316         if (q->buf_offset == q->len) {
 317             /* complete write */
 318             fdd->write_q = q->next;
 319
 320             if (q->free_func)
 321                 (q->free_func) (q->buf);
 322
 323             if (q) {
 324                 memFree(q, MEM_DWRITE_Q);
 325                 q = NULL;
 326             }
 327         }
 328     }
 329
 330     if (fdd->write_q == NULL) {
 331         /* no more data */
 332         fdd->write_q_tail = NULL;
 333     } else {
 334         /* another block is queued */
 335         diskCombineWrites(fdd);
 336         commSetSelect(fd, COMM_SELECT_WRITE, diskHandleWrite, NULL, 0);
 337         F->flags.write_daemon = 1;
 338     }
 339
 340     do_close = F->flags.close_request;
 341
 342     if (fdd->wrt_handle) {
 343         DWCB *callback = fdd->wrt_handle;
 344         void *cbdata;
 345         fdd->wrt_handle = NULL;
 346
 347         if (cbdataReferenceValidDone(fdd->wrt_handle_data, &cbdata)) {
 348             callback(fd, status, len, cbdata);
 349             /*
 350              * NOTE, this callback can close the FD, so we must
 351              * not touch 'F', 'fdd', etc. after this.
 352              */
 353             PROF_stop(diskHandleWrite);
 354             return;
 355             /* XXX But what about close_request??? */
 356         }
 357     }
 358
 359     if (do_close)
 360         file_close(fd);
 361
 362     PROF_stop(diskHandleWrite);
 363 }
 364
 365
 366 /* write block to a file */
 367 /* write back queue. Only one writer at a time. */
 368 /* call a handle when writing is complete. */
 369 void
 370 file_write(int fd,
 371            off_t file_offset,
 372            void const *ptr_to_buf,
 373            int len,
 374            DWCB * handle,
 375            void *handle_data,
 376            FREE * free_func)
 377 {
 378     dwrite_q *wq = NULL;
 379     fde *F = &fd_table[fd];
 380     PROF_start(file_write);
 381     assert(fd >= 0);
 382     assert(F->flags.open);
 383     /* if we got here. Caller is eligible to write. */
 384     wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
 385     wq->file_offset = file_offset;
 386     wq->buf = (char *)ptr_to_buf;
 387     wq->len = len;
 388     wq->buf_offset = 0;
 389     wq->next = NULL;
 390     wq->free_func = free_func;
 391
 392     if (!F->disk.wrt_handle_data) {
 393         F->disk.wrt_handle = handle;
 394         F->disk.wrt_handle_data = cbdataReference(handle_data);
 395     } else {
 396         /* Detect if there is multiple concurrent users of this fd.. we only support one callback */
 397         assert(F->disk.wrt_handle_data == handle_data && F->disk.wrt_handle == handle);
 398     }
 399
 400     /* add to queue */
 401     if (F->disk.write_q == NULL) {
 402         /* empty queue */
 403         F->disk.write_q = F->disk.write_q_tail = wq;
 404     } else {
 405         F->disk.write_q_tail->next = wq;
 406         F->disk.write_q_tail = wq;
 407     }
 408
 409     if (!F->flags.write_daemon) {
 410         diskHandleWrite(fd, NULL);
 411     }
 412
 413     PROF_stop(file_write);
 414 }
 415
 416 /*
 417  * a wrapper around file_write to allow for MemBuf to be file_written
 418  * in a snap
 419  */
 420 void
 421 file_write_mbuf(int fd, off_t off, MemBuf mb, DWCB * handler, void *handler_data)
 422 {
 423     file_write(fd, off, mb.buf, mb.size, handler, handler_data, mb.freeFunc());
 424 }
 425
 426 /* Read from FD */
 427 static void
 428 diskHandleRead(int fd, void *data)
 429 {
 430     dread_ctrl *ctrl_dat = (dread_ctrl *)data;
 431     fde *F = &fd_table[fd];
 432     int len;
 433     int rc = DISK_OK;
 434     /*
 435      * FD < 0 indicates premature close; we just have to free
 436      * the state data.
 437      */
 438
 439     if (fd < 0) {
 440         memFree(ctrl_dat, MEM_DREAD_CTRL);
 441         return;
 442     }
 443
 444     PROF_start(diskHandleRead);
 445
 446     if (F->disk.offset != ctrl_dat->offset) {
 447         debugs(6, 3, "diskHandleRead: FD " << fd << " seeking to offset " << ctrl_dat->offset);
 448         lseek(fd, ctrl_dat->offset, SEEK_SET);  /* XXX ignore return? */
 449         statCounter.syscalls.disk.seeks++;
 450         F->disk.offset = ctrl_dat->offset;
 451     }
 452
 453     errno = 0;
 454     len = FD_READ_METHOD(fd, ctrl_dat->buf, ctrl_dat->req_len);
 455
 456     if (len > 0)
 457         F->disk.offset += len;
 458
 459     statCounter.syscalls.disk.reads++;
 460
 461     fd_bytes(fd, len, FD_READ);
 462
 463     if (len < 0) {
 464         if (ignoreErrno(errno)) {
 465             commSetSelect(fd, COMM_SELECT_READ, diskHandleRead, ctrl_dat, 0);
 466             PROF_stop(diskHandleRead);
 467             return;
 468         }
 469
 470         debugs(50, 1, "diskHandleRead: FD " << fd << ": " << xstrerror());
 471         len = 0;
 472         rc = DISK_ERROR;
 473     } else if (len == 0) {
 474         rc = DISK_EOF;
 475     }
 476
 477     if (cbdataReferenceValid(ctrl_dat->client_data))
 478         ctrl_dat->handler(fd, ctrl_dat->buf, len, rc, ctrl_dat->client_data);
 479
 480     cbdataReferenceDone(ctrl_dat->client_data);
 481
 482     memFree(ctrl_dat, MEM_DREAD_CTRL);
 483
 484     PROF_stop(diskHandleRead);
 485 }
 486
 487
 488 /* start read operation */
 489 /* buffer must be allocated from the caller.
 490  * It must have at least req_len space in there.
 491  * call handler when a reading is complete. */
 492 void
 493 file_read(int fd, char *buf, int req_len, off_t offset, DRCB * handler, void *client_data)
 494 {
 495     dread_ctrl *ctrl_dat;
 496     PROF_start(file_read);
 497     assert(fd >= 0);
 498     ctrl_dat = (dread_ctrl *)memAllocate(MEM_DREAD_CTRL);
 499     ctrl_dat->fd = fd;
 500     ctrl_dat->offset = offset;
 501     ctrl_dat->req_len = req_len;
 502     ctrl_dat->buf = buf;
 503     ctrl_dat->end_of_file = 0;
 504     ctrl_dat->handler = handler;
 505     ctrl_dat->client_data = cbdataReference(client_data);
 506     diskHandleRead(fd, ctrl_dat);
 507     PROF_stop(file_read);
 508 }
 509
 510 void
 511 safeunlink(const char *s, int quiet)
 512 {
 513     statCounter.syscalls.disk.unlinks++;
 514
 515     if (unlink(s) < 0 && !quiet)
 516         debugs(50, 1, "safeunlink: Couldn't delete " << s << ": " << xstrerror());
 517 }
 518
 519 /*
 520  * Same as rename(2) but complains if something goes wrong;
 521  * the caller is responsible for handing and explaining the
 522  * consequences of errors.
 523  */
 524 int
 525 xrename(const char *from, const char *to)
 526 {
 527     debugs(21, 2, "xrename: renaming " << from << " to " << to);
 528 #if defined (_SQUID_OS2_) || defined (_SQUID_WIN32_)
 529
 530     remove
 531         (to);
 532
 533 #endif
 534
 535     if (0 == rename(from, to))
 536         return 0;
 537
 538     debugs(21, errno == ENOENT ? 2 : 1, "xrename: Cannot rename " << from << " to " << to << ": " << xstrerror());
 539
 540     return -1;
 541 }
 542