src/DiskIO/DiskDaemon/DiskdIOStrategy.cc

   1 /*
   2  * DEBUG: section 79    Squid-side DISKD I/O functions.
   3  * AUTHOR: Duane Wessels
   4  *
   5  * SQUID Web Proxy Cache          http://www.squid-cache.org/
   6  * ----------------------------------------------------------
   7  *
   8  *  Squid is the result of efforts by numerous individuals from
   9  *  the Internet community; see the CONTRIBUTORS file for full
  10  *  details.   Many organizations have provided support for Squid's
  11  *  development; see the SPONSORS file for full details.  Squid is
  12  *  Copyrighted (C) 2001 by the Regents of the University of
  13  *  California; see the COPYRIGHT file for full details.  Squid
  14  *  incorporates software developed and/or copyrighted by other
  15  *  sources; see the CREDITS file for full details.
  16  *
  17  *  This program is free software; you can redistribute it and/or modify
  18  *  it under the terms of the GNU General Public License as published by
  19  *  the Free Software Foundation; either version 2 of the License, or
  20  *  (at your option) any later version.
  21  *
  22  *  This program is distributed in the hope that it will be useful,
  23  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  24  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  25  *  GNU General Public License for more details.
  26  *
  27  *  You should have received a copy of the GNU General Public License
  28  *  along with this program; if not, write to the Free Software
  29  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
  30  *
  31  * Copyright (c) 2003, Robert Collins <robertc@squid-cache.org>
  32  */
  33
  34 #include "squid.h"
  35 #include "comm/Loops.h"
  36 #include "ConfigOption.h"
  37 #include "DiskdIOStrategy.h"
  38 #include "DiskIO/DiskFile.h"
  39 #include "DiskdFile.h"
  40 #include "diomsg.h"
  41 #include "fd.h"
  42 #include "Store.h"
  43 #include "StatCounters.h"
  44 #include "SquidConfig.h"
  45 #include "SquidIpc.h"
  46 #include "SquidTime.h"
  47 #include "unlinkd.h"
  48
  49 #if HAVE_SYS_IPC_H
  50 #include <sys/ipc.h>
  51 #endif
  52 #if HAVE_SYS_MSG_H
  53 #include <sys/msg.h>
  54 #endif
  55 #if HAVE_SYS_SHM_H
  56 #include <sys/shm.h>
  57 #endif
  58 #if HAVE_ERRNO_H
  59 #include <errno.h>
  60 #endif
  61
  62 diskd_stats_t diskd_stats;
  63
  64 size_t DiskdIOStrategy::nextInstanceID (0);
  65 const int diomsg::msg_snd_rcv_sz = sizeof(diomsg) - sizeof(mtyp_t);
  66
  67 size_t
  68 DiskdIOStrategy::newInstance()
  69 {
  70     return ++nextInstanceID;
  71 }
  72
  73 bool
  74 DiskdIOStrategy::shedLoad()
  75 {
  76     /*
  77      * Fail on open() if there are too many requests queued.
  78      */
  79
  80     if (away > magic1) {
  81         debugs(79, 3, "storeDiskdIO::shedLoad: Shedding, too many requests away");
  82
  83         return true;
  84     }
  85
  86     return false;
  87 }
  88
  89 int
  90 DiskdIOStrategy::load()
  91 {
  92     /* Calculate the storedir load relative to magic2 on a scale of 0 .. 1000 */
  93     /* the parse function guarantees magic2 is positivie */
  94     return away * 1000 / magic2;
  95 }
  96
  97 void
  98 DiskdIOStrategy::openFailed()
  99 {
 100     ++diskd_stats.open_fail_queue_len;
 101 }
 102
 103 DiskFile::Pointer
 104 DiskdIOStrategy::newFile(char const *path)
 105 {
 106     if (shedLoad()) {
 107         openFailed();
 108         return NULL;
 109     }
 110
 111     return new DiskdFile (path, this);
 112 }
 113
 114 DiskdIOStrategy::DiskdIOStrategy() : magic1(64), magic2(72), away(0) , smsgid(-1), rmsgid(-1), wfd(-1) , instanceID(newInstance())
 115 {}
 116
 117 bool
 118 DiskdIOStrategy::unlinkdUseful() const
 119 {
 120     return true;
 121 }
 122
 123 void
 124 DiskdIOStrategy::unlinkFile(char const *path)
 125 {
 126     if (shedLoad()) {
 127         /* Damn, we need to issue a sync unlink here :( */
 128         debugs(79, 2, "storeDiskUnlink: Out of queue space, sync unlink");
 129         unlinkdUnlink(path);
 130         return;
 131     }
 132
 133     /* We can attempt a diskd unlink */
 134     int x;
 135
 136     ssize_t shm_offset;
 137
 138     char *buf;
 139
 140     buf = (char *)shm.get(&shm_offset);
 141
 142     xstrncpy(buf, path, SHMBUF_BLKSZ);
 143
 144     x = send(_MQD_UNLINK,
 145              0,
 146              (StoreIOState::Pointer )NULL,
 147              0,
 148              0,
 149              shm_offset);
 150
 151     if (x < 0) {
 152         debugs(79, DBG_IMPORTANT, "storeDiskdSend UNLINK: " << xstrerror());
 153         ::unlink(buf);          /* XXX EWW! */
 154         //        shm.put (shm_offset);
 155     }
 156
 157     ++diskd_stats.unlink.ops;
 158 }
 159
 160 void
 161 DiskdIOStrategy::init()
 162 {
 163     int pid;
 164     void * hIpc;
 165     int rfd;
 166     int ikey;
 167     const char *args[5];
 168     char skey1[32];
 169     char skey2[32];
 170     char skey3[32];
 171     Ip::Address localhost;
 172
 173     ikey = (getpid() << 10) + (instanceID << 2);
 174     ikey &= 0x7fffffff;
 175     smsgid = msgget((key_t) ikey, 0700 | IPC_CREAT);
 176
 177     if (smsgid < 0) {
 178         debugs(50, DBG_CRITICAL, "storeDiskdInit: msgget: " << xstrerror());
 179         fatal("msgget failed");
 180     }
 181
 182     rmsgid = msgget((key_t) (ikey + 1), 0700 | IPC_CREAT);
 183
 184     if (rmsgid < 0) {
 185         debugs(50, DBG_CRITICAL, "storeDiskdInit: msgget: " << xstrerror());
 186         fatal("msgget failed");
 187     }
 188
 189     shm.init(ikey, magic2);
 190     snprintf(skey1, 32, "%d", ikey);
 191     snprintf(skey2, 32, "%d", ikey + 1);
 192     snprintf(skey3, 32, "%d", ikey + 2);
 193     args[0] = "diskd";
 194     args[1] = skey1;
 195     args[2] = skey2;
 196     args[3] = skey3;
 197     args[4] = NULL;
 198     localhost.SetLocalhost();
 199     pid = ipcCreate(IPC_STREAM,
 200                     Config.Program.diskd,
 201                     args,
 202                     "diskd",
 203                     localhost,
 204                     &rfd,
 205                     &wfd,
 206                     &hIpc);
 207
 208     if (pid < 0)
 209         fatalf("execl: %s", Config.Program.diskd);
 210
 211     if (rfd != wfd)
 212         comm_close(rfd);
 213
 214     fd_note(wfd, "squid -> diskd");
 215
 216     commUnsetFdTimeout(wfd);
 217     commSetNonBlocking(wfd);
 218     Comm::QuickPollRequired();
 219 }
 220
 221 /*
 222  * SHM manipulation routines
 223  */
 224 void
 225 SharedMemory::put(ssize_t offset)
 226 {
 227     int i;
 228     assert(offset >= 0);
 229     assert(offset < nbufs * SHMBUF_BLKSZ);
 230     i = offset / SHMBUF_BLKSZ;
 231     assert(i < nbufs);
 232     assert(CBIT_TEST(inuse_map, i));
 233     CBIT_CLR(inuse_map, i);
 234     --diskd_stats.shmbuf_count;
 235 }
 236
 237 void *
 238
 239 SharedMemory::get(ssize_t * shm_offset)
 240 {
 241     char *aBuf = NULL;
 242     int i;
 243
 244     for (i = 0; i < nbufs; ++i) {
 245         if (CBIT_TEST(inuse_map, i))
 246             continue;
 247
 248         CBIT_SET(inuse_map, i);
 249
 250         *shm_offset = i * SHMBUF_BLKSZ;
 251
 252         aBuf = buf + (*shm_offset);
 253
 254         break;
 255     }
 256
 257     assert(aBuf);
 258     assert(aBuf >= buf);
 259     assert(aBuf < buf + (nbufs * SHMBUF_BLKSZ));
 260     ++diskd_stats.shmbuf_count;
 261
 262     if (diskd_stats.max_shmuse < diskd_stats.shmbuf_count)
 263         diskd_stats.max_shmuse = diskd_stats.shmbuf_count;
 264
 265     return aBuf;
 266 }
 267
 268 void
 269 SharedMemory::init(int ikey, int magic2)
 270 {
 271     nbufs = (int)(magic2 * 1.3);
 272     id = shmget((key_t) (ikey + 2),
 273                 nbufs * SHMBUF_BLKSZ, 0600 | IPC_CREAT);
 274
 275     if (id < 0) {
 276         debugs(50, DBG_CRITICAL, "storeDiskdInit: shmget: " << xstrerror());
 277         fatal("shmget failed");
 278     }
 279
 280     buf = (char *)shmat(id, NULL, 0);
 281
 282     if (buf == (void *) -1) {
 283         debugs(50, DBG_CRITICAL, "storeDiskdInit: shmat: " << xstrerror());
 284         fatal("shmat failed");
 285     }
 286
 287     inuse_map = (char *)xcalloc((nbufs + 7) / 8, 1);
 288     diskd_stats.shmbuf_count += nbufs;
 289
 290     for (int i = 0; i < nbufs; ++i) {
 291         CBIT_SET(inuse_map, i);
 292         put (i * SHMBUF_BLKSZ);
 293     }
 294 }
 295
 296 void
 297 DiskdIOStrategy::unlinkDone(diomsg * M)
 298 {
 299     debugs(79, 3, "storeDiskdUnlinkDone: file " << shm.buf + M->shm_offset << " status " << M->status);
 300     ++statCounter.syscalls.disk.unlinks;
 301
 302     if (M->status < 0)
 303         ++diskd_stats.unlink.fail;
 304     else
 305         ++diskd_stats.unlink.success;
 306 }
 307
 308 void
 309 DiskdIOStrategy::handle(diomsg * M)
 310 {
 311     if (!cbdataReferenceValid (M->callback_data)) {
 312         /* I.e. already closed file
 313          * - say when we have a error opening after
 314          *   a read was already queued
 315          */
 316         debugs(79, 3, "storeDiskdHandle: Invalid callback_data " << M->callback_data);
 317         cbdataReferenceDone (M->callback_data);
 318         return;
 319     }
 320
 321     /* set errno passed from diskd.  makes debugging more meaningful */
 322     if (M->status < 0)
 323         errno = -M->status;
 324
 325     if (M->newstyle) {
 326         DiskdFile *theFile = (DiskdFile *)M->callback_data;
 327         theFile->unlock();
 328         theFile->completed (M);
 329     } else
 330         switch (M->mtype) {
 331
 332         case _MQD_OPEN:
 333
 334         case _MQD_CREATE:
 335
 336         case _MQD_CLOSE:
 337
 338         case _MQD_READ:
 339
 340         case _MQD_WRITE:
 341             assert (0);
 342             break;
 343
 344         case _MQD_UNLINK:
 345             unlinkDone(M);
 346             break;
 347
 348         default:
 349             assert(0);
 350             break;
 351         }
 352
 353     cbdataReferenceDone (M->callback_data);
 354 }
 355
 356 int
 357 DiskdIOStrategy::send(int mtype, int id, DiskdFile *theFile, size_t size, off_t offset, ssize_t shm_offset, Lock *requestor)
 358 {
 359     diomsg M;
 360     M.callback_data = cbdataReference(theFile);
 361     theFile->lock();
 362     M.requestor = requestor;
 363     M.newstyle = true;
 364
 365     if (requestor)
 366         requestor->lock();
 367
 368     return SEND(&M, mtype, id, size, offset, shm_offset);
 369 }
 370
 371 int
 372 DiskdIOStrategy::send(int mtype, int id, RefCount<StoreIOState> sio, size_t size, off_t offset, ssize_t shm_offset)
 373 {
 374     diomsg M;
 375     M.callback_data = cbdataReference(sio.getRaw());
 376     M.newstyle = false;
 377
 378     return SEND(&M, mtype, id, size, offset, shm_offset);
 379 }
 380
 381 int
 382 DiskdIOStrategy::SEND(diomsg *M, int mtype, int id, size_t size, off_t offset, ssize_t shm_offset)
 383 {
 384     static int send_errors = 0;
 385     static int last_seq_no = 0;
 386     static int seq_no = 0;
 387     int x;
 388
 389     M->mtype = mtype;
 390     M->size = size;
 391     M->offset = offset;
 392     M->status = -1;
 393     M->shm_offset = (int) shm_offset;
 394     M->id = id;
 395     M->seq_no = ++seq_no;
 396
 397     if (M->seq_no < last_seq_no)
 398         debugs(79, DBG_IMPORTANT, "WARNING: sequencing out of order");
 399
 400     x = msgsnd(smsgid, M, diomsg::msg_snd_rcv_sz, IPC_NOWAIT);
 401
 402     last_seq_no = M->seq_no;
 403
 404     if (0 == x) {
 405         ++diskd_stats.sent_count;
 406         ++away;
 407     } else {
 408         debugs(79, DBG_IMPORTANT, "storeDiskdSend: msgsnd: " << xstrerror());
 409         cbdataReferenceDone(M->callback_data);
 410         assert(++send_errors < 100);
 411         if (shm_offset > -1)
 412             shm.put(shm_offset);
 413     }
 414
 415     /*
 416      * We have to drain the queue here if necessary.  If we don't,
 417      * then we can have a lot of messages in the queue (probably
 418      * up to 2*magic1) and we can run out of shared memory buffers.
 419      */
 420     /*
 421      * Note that we call Store::Root().callbackk (for all SDs), rather
 422      * than callback for just this SD, so that while
 423      * we're "blocking" on this SD we can also handle callbacks
 424      * from other SDs that might be ready.
 425      */
 426
 427     struct timeval delay = {0, 1};
 428
 429     while (away > magic2) {
 430         select(0, NULL, NULL, NULL, &delay);
 431         Store::Root().callback();
 432
 433         if (delay.tv_usec < 1000000)
 434             delay.tv_usec <<= 1;
 435     }
 436
 437     return x;
 438 }
 439
 440 ConfigOption *
 441 DiskdIOStrategy::getOptionTree() const
 442 {
 443     ConfigOptionVector *result = new ConfigOptionVector;
 444     result->options.push_back(new ConfigOptionAdapter<DiskdIOStrategy>(*const_cast<DiskdIOStrategy *>(this), &DiskdIOStrategy::optionQ1Parse, &DiskdIOStrategy::optionQ1Dump));
 445     result->options.push_back(new ConfigOptionAdapter<DiskdIOStrategy>(*const_cast<DiskdIOStrategy *>(this), &DiskdIOStrategy::optionQ2Parse, &DiskdIOStrategy::optionQ2Dump));
 446     return result;
 447 }
 448
 449 bool
 450 DiskdIOStrategy::optionQ1Parse(const char *name, const char *value, int isaReconfig)
 451 {
 452     if (strcmp(name, "Q1") != 0)
 453         return false;
 454
 455     int old_magic1 = magic1;
 456
 457     magic1 = atoi(value);
 458
 459     if (!isaReconfig)
 460         return true;
 461
 462     if (old_magic1 < magic1) {
 463         /*
 464         * This is because shm.nbufs is computed at startup, when
 465         * we call shmget().  We can't increase the Q1/Q2 parameters
 466         * beyond their initial values because then we might have
 467         * more "Q2 messages" than shared memory chunks, and this
 468         * will cause an assertion in storeDiskdShmGet().
 469         */
 470         /* TODO: have DiskdIO hold a link to the swapdir, to allow detailed reporting again */
 471         debugs(3, DBG_IMPORTANT, "WARNING: cannot increase cache_dir Q1 value while Squid is running.");
 472         magic1 = old_magic1;
 473         return true;
 474     }
 475
 476     if (old_magic1 != magic1)
 477         debugs(3, DBG_IMPORTANT, "cache_dir new Q1 value '" << magic1 << "'");
 478
 479     return true;
 480 }
 481
 482 void
 483 DiskdIOStrategy::optionQ1Dump(StoreEntry * e) const
 484 {
 485     storeAppendPrintf(e, " Q1=%d", magic1);
 486 }
 487
 488 bool
 489 DiskdIOStrategy::optionQ2Parse(const char *name, const char *value, int isaReconfig)
 490 {
 491     if (strcmp(name, "Q2") != 0)
 492         return false;
 493
 494     int old_magic2 = magic2;
 495
 496     magic2 = atoi(value);
 497
 498     if (!isaReconfig)
 499         return true;
 500
 501     if (old_magic2 < magic2) {
 502         /* See comments in Q1 function above */
 503         debugs(3, DBG_IMPORTANT, "WARNING: cannot increase cache_dir Q2 value while Squid is running.");
 504         magic2 = old_magic2;
 505         return true;
 506     }
 507
 508     if (old_magic2 != magic2)
 509         debugs(3, DBG_IMPORTANT, "cache_dir new Q2 value '" << magic2 << "'");
 510
 511     return true;
 512 }
 513
 514 void
 515 DiskdIOStrategy::optionQ2Dump(StoreEntry * e) const
 516 {
 517     storeAppendPrintf(e, " Q2=%d", magic2);
 518 }
 519
 520 /*
 521  * Sync any pending data. We just sit around and read the queue
 522  * until the data has finished writing.
 523  */
 524 void
 525 DiskdIOStrategy::sync()
 526 {
 527     static time_t lastmsg = 0;
 528
 529     while (away > 0) {
 530         if (squid_curtime > lastmsg) {
 531             debugs(47, DBG_IMPORTANT, "storeDiskdDirSync: " << away << " messages away");
 532             lastmsg = squid_curtime;
 533         }
 534
 535         callback();
 536     }
 537 }
 538
 539 /*
 540  * Handle callbacks. If we have more than magic2 requests away, we block
 541  * until the queue is below magic2. Otherwise, we simply return when we
 542  * don't get a message.
 543  */
 544
 545 int
 546 DiskdIOStrategy::callback()
 547 {
 548     diomsg M;
 549     int x;
 550     int retval = 0;
 551
 552     if (away >= magic2) {
 553         ++diskd_stats.block_queue_len;
 554         retval = 1;
 555         /* We might not have anything to do, but our queue
 556          * is full.. */
 557     }
 558
 559     if (diskd_stats.sent_count - diskd_stats.recv_count >
 560             diskd_stats.max_away) {
 561         diskd_stats.max_away = diskd_stats.sent_count - diskd_stats.recv_count;
 562     }
 563
 564     while (1) {
 565 #ifdef  ALWAYS_ZERO_BUFFERS
 566         memset(&M, '\0', sizeof(M));
 567 #endif
 568
 569         x = msgrcv(rmsgid, &M, diomsg::msg_snd_rcv_sz, 0, IPC_NOWAIT);
 570
 571         if (x < 0)
 572             break;
 573         else if (x != diomsg::msg_snd_rcv_sz) {
 574             debugs(47, DBG_IMPORTANT, "storeDiskdDirCallback: msgget returns " << x);
 575             break;
 576         }
 577
 578         ++diskd_stats.recv_count;
 579         --away;
 580         handle(&M);
 581         retval = 1;             /* Return that we've actually done some work */
 582
 583         if (M.shm_offset > -1)
 584             shm.put ((off_t) M.shm_offset);
 585     }
 586
 587     return retval;
 588 }
 589
 590 void
 591 DiskdIOStrategy::statfs(StoreEntry & sentry)const
 592 {
 593     storeAppendPrintf(&sentry, "Pending operations: %d\n", away);
 594 }