[thirdparty/squid.git] / src / DiskIO / DiskDaemon / DiskdIOStrategy.cc

/*
 * $Id$
 *
 * DEBUG: section 79    Squid-side DISKD I/O functions.
 * AUTHOR: Duane Wessels
 *
 * SQUID Web Proxy Cache          http://www.squid-cache.org/
 * ----------------------------------------------------------
 *
 *  Squid is the result of efforts by numerous individuals from
 *  the Internet community; see the CONTRIBUTORS file for full
 *  details.   Many organizations have provided support for Squid's
 *  development; see the SPONSORS file for full details.  Squid is
 *  Copyrighted (C) 2001 by the Regents of the University of
 *  California; see the COPYRIGHT file for full details.  Squid
 *  incorporates software developed and/or copyrighted by other
 *  sources; see the CREDITS file for full details.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
 *
 * Copyright (c) 2003, Robert Collins <robertc@squid-cache.org>
 */

#include "squid.h"
#include "comm/Loops.h"
#include "ConfigOption.h"
#include "DiskdIOStrategy.h"
#include "DiskIO/DiskFile.h"
#include "DiskdFile.h"
#include "diomsg.h"
#include "protos.h"
#include "Store.h"
#include "StatCounters.h"
#include "SquidTime.h"

#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/shm.h>
#if HAVE_ERRNO_H
#include <errno.h>
#endif

diskd_stats_t diskd_stats;

size_t DiskdIOStrategy::nextInstanceID (0);
const int diomsg::msg_snd_rcv_sz = sizeof(diomsg) - sizeof(mtyp_t);

size_t
DiskdIOStrategy::newInstance()
{
    return ++nextInstanceID;
}

bool
DiskdIOStrategy::shedLoad()
{
    /*
     * Fail on open() if there are too many requests queued.
     */

    if (away > magic1) {
        debugs(79, 3, "storeDiskdIO::shedLoad: Shedding, too many requests away");

        return true;
    }

    return false;
}

int
DiskdIOStrategy::load()
{
    /* Calculate the storedir load relative to magic2 on a scale of 0 .. 1000 */
    /* the parse function guarantees magic2 is positivie */
    return away * 1000 / magic2;
}

void
DiskdIOStrategy::openFailed()
{
    ++diskd_stats.open_fail_queue_len;
}

DiskFile::Pointer
DiskdIOStrategy::newFile(char const *path)
{
    if (shedLoad()) {
        openFailed();
        return NULL;
    }

    return new DiskdFile (path, this);
}

DiskdIOStrategy::DiskdIOStrategy() : magic1(64), magic2(72), away(0) , smsgid(-1), rmsgid(-1), wfd(-1) , instanceID(newInstance())
{}

bool
DiskdIOStrategy::unlinkdUseful() const
{
    return true;
}

void
DiskdIOStrategy::unlinkFile(char const *path)
{
    if (shedLoad()) {
        /* Damn, we need to issue a sync unlink here :( */
        debugs(79, 2, "storeDiskUnlink: Out of queue space, sync unlink");
#if USE_UNLINKD

        unlinkdUnlink(path);
#else

        unlink(path);
#endif

        return;
    }

    /* We can attempt a diskd unlink */
    int x;

    ssize_t shm_offset;

    char *buf;

    buf = (char *)shm.get(&shm_offset);

    xstrncpy(buf, path, SHMBUF_BLKSZ);

    x = send(_MQD_UNLINK,
             0,
             (StoreIOState::Pointer )NULL,
             0,
             0,
             shm_offset);

    if (x < 0) {
        debugs(79, DBG_IMPORTANT, "storeDiskdSend UNLINK: " << xstrerror());
        ::unlink(buf);		/* XXX EWW! */
        //        shm.put (shm_offset);
    }

    ++diskd_stats.unlink.ops;
}

void
DiskdIOStrategy::init()
{
    int pid;
    void * hIpc;
    int rfd;
    int ikey;
    const char *args[5];
    char skey1[32];
    char skey2[32];
    char skey3[32];
    Ip::Address localhost;

    ikey = (getpid() << 10) + (instanceID << 2);
    ikey &= 0x7fffffff;
    smsgid = msgget((key_t) ikey, 0700 | IPC_CREAT);

    if (smsgid < 0) {
        debugs(50, DBG_CRITICAL, "storeDiskdInit: msgget: " << xstrerror());
        fatal("msgget failed");
    }

    rmsgid = msgget((key_t) (ikey + 1), 0700 | IPC_CREAT);

    if (rmsgid < 0) {
        debugs(50, DBG_CRITICAL, "storeDiskdInit: msgget: " << xstrerror());
        fatal("msgget failed");
    }

    shm.init(ikey, magic2);
    snprintf(skey1, 32, "%d", ikey);
    snprintf(skey2, 32, "%d", ikey + 1);
    snprintf(skey3, 32, "%d", ikey + 2);
    args[0] = "diskd";
    args[1] = skey1;
    args[2] = skey2;
    args[3] = skey3;
    args[4] = NULL;
    localhost.SetLocalhost();
    pid = ipcCreate(IPC_STREAM,
                    Config.Program.diskd,
                    args,
                    "diskd",
                    localhost,
                    &rfd,
                    &wfd,
                    &hIpc);

    if (pid < 0)
        fatalf("execl: %s", Config.Program.diskd);

    if (rfd != wfd)
        comm_close(rfd);

    fd_note(wfd, "squid -> diskd");

    commUnsetFdTimeout(wfd);
    commSetNonBlocking(wfd);
    Comm::QuickPollRequired();
}

/*
 * SHM manipulation routines
 */
void
SharedMemory::put(ssize_t offset)
{
    int i;
    assert(offset >= 0);
    assert(offset < nbufs * SHMBUF_BLKSZ);
    i = offset / SHMBUF_BLKSZ;
    assert(i < nbufs);
    assert(CBIT_TEST(inuse_map, i));
    CBIT_CLR(inuse_map, i);
    --diskd_stats.shmbuf_count;
}

void *

SharedMemory::get(ssize_t * shm_offset)
{
    char *aBuf = NULL;
    int i;

    for (i = 0; i < nbufs; ++i) {
        if (CBIT_TEST(inuse_map, i))
            continue;

        CBIT_SET(inuse_map, i);

        *shm_offset = i * SHMBUF_BLKSZ;

        aBuf = buf + (*shm_offset);

        break;
    }

    assert(aBuf);
    assert(aBuf >= buf);
    assert(aBuf < buf + (nbufs * SHMBUF_BLKSZ));
    ++diskd_stats.shmbuf_count;

    if (diskd_stats.max_shmuse < diskd_stats.shmbuf_count)
        diskd_stats.max_shmuse = diskd_stats.shmbuf_count;

    return aBuf;
}

void
SharedMemory::init(int ikey, int magic2)
{
    nbufs = (int)(magic2 * 1.3);
    id = shmget((key_t) (ikey + 2),
                nbufs * SHMBUF_BLKSZ, 0600 | IPC_CREAT);

    if (id < 0) {
        debugs(50, DBG_CRITICAL, "storeDiskdInit: shmget: " << xstrerror());
        fatal("shmget failed");
    }

    buf = (char *)shmat(id, NULL, 0);

    if (buf == (void *) -1) {
        debugs(50, DBG_CRITICAL, "storeDiskdInit: shmat: " << xstrerror());
        fatal("shmat failed");
    }

    inuse_map = (char *)xcalloc((nbufs + 7) / 8, 1);
    diskd_stats.shmbuf_count += nbufs;

    for (int i = 0; i < nbufs; ++i) {
        CBIT_SET(inuse_map, i);
        put (i * SHMBUF_BLKSZ);
    }
}

void
DiskdIOStrategy::unlinkDone(diomsg * M)
{
    debugs(79, 3, "storeDiskdUnlinkDone: file " << shm.buf + M->shm_offset << " status " << M->status);
    ++statCounter.syscalls.disk.unlinks;

    if (M->status < 0)
        ++diskd_stats.unlink.fail;
    else
        ++diskd_stats.unlink.success;
}

void
DiskdIOStrategy::handle(diomsg * M)
{
    if (!cbdataReferenceValid (M->callback_data)) {
        /* I.e. already closed file
         * - say when we have a error opening after
         *   a read was already queued
         */
        debugs(79, 3, "storeDiskdHandle: Invalid callback_data " << M->callback_data);
        cbdataReferenceDone (M->callback_data);
        return;
    }

    /* set errno passed from diskd.  makes debugging more meaningful */
    if (M->status < 0)
        errno = -M->status;

    if (M->newstyle) {
        DiskdFile *theFile = (DiskdFile *)M->callback_data;
        theFile->RefCountDereference();
        theFile->completed (M);
    } else
        switch (M->mtype) {

        case _MQD_OPEN:

        case _MQD_CREATE:

        case _MQD_CLOSE:

        case _MQD_READ:

        case _MQD_WRITE:
            assert (0);
            break;

        case _MQD_UNLINK:
            unlinkDone(M);
            break;

        default:
            assert(0);
            break;
        }

    cbdataReferenceDone (M->callback_data);
}

int
DiskdIOStrategy::send(int mtype, int id, DiskdFile *theFile, size_t size, off_t offset, ssize_t shm_offset, RefCountable_ *requestor)
{
    diomsg M;
    M.callback_data = cbdataReference(theFile);
    theFile->RefCountReference();
    M.requestor = requestor;
    M.newstyle = true;

    if (requestor)
        requestor->RefCountReference();

    return SEND(&M, mtype, id, size, offset, shm_offset);
}

int
DiskdIOStrategy::send(int mtype, int id, RefCount<StoreIOState> sio, size_t size, off_t offset, ssize_t shm_offset)
{
    diomsg M;
    M.callback_data = cbdataReference(sio.getRaw());
    M.newstyle = false;

    return SEND(&M, mtype, id, size, offset, shm_offset);
}

int
DiskdIOStrategy::SEND(diomsg *M, int mtype, int id, size_t size, off_t offset, ssize_t shm_offset)
{
    static int send_errors = 0;
    static int last_seq_no = 0;
    static int seq_no = 0;
    int x;

    M->mtype = mtype;
    M->size = size;
    M->offset = offset;
    M->status = -1;
    M->shm_offset = (int) shm_offset;
    M->id = id;
    M->seq_no = ++seq_no;

    if (M->seq_no < last_seq_no)
        debugs(79, DBG_IMPORTANT, "WARNING: sequencing out of order");

    x = msgsnd(smsgid, M, diomsg::msg_snd_rcv_sz, IPC_NOWAIT);

    last_seq_no = M->seq_no;

    if (0 == x) {
        ++diskd_stats.sent_count;
        ++away;
    } else {
        debugs(79, DBG_IMPORTANT, "storeDiskdSend: msgsnd: " << xstrerror());
        cbdataReferenceDone(M->callback_data);
        assert(++send_errors < 100);
        if (shm_offset > -1)
            shm.put(shm_offset);
    }

    /*
     * We have to drain the queue here if necessary.  If we don't,
     * then we can have a lot of messages in the queue (probably
     * up to 2*magic1) and we can run out of shared memory buffers.
     */
    /*
     * Note that we call Store::Root().callbackk (for all SDs), rather
     * than callback for just this SD, so that while
     * we're "blocking" on this SD we can also handle callbacks
     * from other SDs that might be ready.
     */

    struct timeval delay = {0, 1};

    while (away > magic2) {
        select(0, NULL, NULL, NULL, &delay);
        Store::Root().callback();

        if (delay.tv_usec < 1000000)
            delay.tv_usec <<= 1;
    }

    return x;
}

ConfigOption *
DiskdIOStrategy::getOptionTree() const
{
    ConfigOptionVector *result = new ConfigOptionVector;
    result->options.push_back(new ConfigOptionAdapter<DiskdIOStrategy>(*const_cast<DiskdIOStrategy *>(this), &DiskdIOStrategy::optionQ1Parse, &DiskdIOStrategy::optionQ1Dump));
    result->options.push_back(new ConfigOptionAdapter<DiskdIOStrategy>(*const_cast<DiskdIOStrategy *>(this), &DiskdIOStrategy::optionQ2Parse, &DiskdIOStrategy::optionQ2Dump));
    return result;
}

bool
DiskdIOStrategy::optionQ1Parse(const char *name, const char *value, int isaReconfig)
{
    if (strcmp(name, "Q1") != 0)
        return false;

    int old_magic1 = magic1;

    magic1 = atoi(value);

    if (!isaReconfig)
        return true;

    if (old_magic1 < magic1) {
        /*
        * This is because shm.nbufs is computed at startup, when
        * we call shmget().  We can't increase the Q1/Q2 parameters
        * beyond their initial values because then we might have
        * more "Q2 messages" than shared memory chunks, and this
        * will cause an assertion in storeDiskdShmGet().
        */
        /* TODO: have DiskdIO hold a link to the swapdir, to allow detailed reporting again */
        debugs(3, DBG_IMPORTANT, "WARNING: cannot increase cache_dir Q1 value while Squid is running.");
        magic1 = old_magic1;
        return true;
    }

    if (old_magic1 != magic1)
        debugs(3, DBG_IMPORTANT, "cache_dir new Q1 value '" << magic1 << "'");

    return true;
}

void
DiskdIOStrategy::optionQ1Dump(StoreEntry * e) const
{
    storeAppendPrintf(e, " Q1=%d", magic1);
}

bool
DiskdIOStrategy::optionQ2Parse(const char *name, const char *value, int isaReconfig)
{
    if (strcmp(name, "Q2") != 0)
        return false;

    int old_magic2 = magic2;

    magic2 = atoi(value);

    if (!isaReconfig)
        return true;

    if (old_magic2 < magic2) {
        /* See comments in Q1 function above */
        debugs(3, DBG_IMPORTANT, "WARNING: cannot increase cache_dir Q2 value while Squid is running.");
        magic2 = old_magic2;
        return true;
    }

    if (old_magic2 != magic2)
        debugs(3, DBG_IMPORTANT, "cache_dir new Q2 value '" << magic2 << "'");

    return true;
}

void
DiskdIOStrategy::optionQ2Dump(StoreEntry * e) const
{
    storeAppendPrintf(e, " Q2=%d", magic2);
}

/*
 * Sync any pending data. We just sit around and read the queue
 * until the data has finished writing.
 */
void
DiskdIOStrategy::sync()
{
    static time_t lastmsg = 0;

    while (away > 0) {
        if (squid_curtime > lastmsg) {
            debugs(47, DBG_IMPORTANT, "storeDiskdDirSync: " << away << " messages away");
            lastmsg = squid_curtime;
        }

        callback();
    }
}

/*
 * Handle callbacks. If we have more than magic2 requests away, we block
 * until the queue is below magic2. Otherwise, we simply return when we
 * don't get a message.
 */

int
DiskdIOStrategy::callback()
{
    diomsg M;
    int x;
    int retval = 0;

    if (away >= magic2) {
        ++diskd_stats.block_queue_len;
        retval = 1;
        /* We might not have anything to do, but our queue
         * is full.. */
    }

    if (diskd_stats.sent_count - diskd_stats.recv_count >
            diskd_stats.max_away) {
        diskd_stats.max_away = diskd_stats.sent_count - diskd_stats.recv_count;
    }

    while (1) {
#ifdef	ALWAYS_ZERO_BUFFERS
        memset(&M, '\0', sizeof(M));
#endif

        x = msgrcv(rmsgid, &M, diomsg::msg_snd_rcv_sz, 0, IPC_NOWAIT);

        if (x < 0)
            break;
        else if (x != diomsg::msg_snd_rcv_sz) {
            debugs(47, DBG_IMPORTANT, "storeDiskdDirCallback: msgget returns " << x);
            break;
        }

        ++diskd_stats.recv_count;
        --away;
        handle(&M);
        retval = 1;		/* Return that we've actually done some work */

        if (M.shm_offset > -1)
            shm.put ((off_t) M.shm_offset);
    }

    return retval;
}

void
DiskdIOStrategy::statfs(StoreEntry & sentry)const
{
    storeAppendPrintf(&sentry, "Pending operations: %d\n", away);
}
Commit	Line	Data
b9ae18aa	1	/*
262a0e14	2	* $Id$
b9ae18aa	3	*
	4	* DEBUG: section 79 Squid-side DISKD I/O functions.
	5	* AUTHOR: Duane Wessels
	6	*
	7	* SQUID Web Proxy Cache http://www.squid-cache.org/
	8	* ----------------------------------------------------------
	9	*
	10	* Squid is the result of efforts by numerous individuals from
	11	* the Internet community; see the CONTRIBUTORS file for full
	12	* details. Many organizations have provided support for Squid's
	13	* development; see the SPONSORS file for full details. Squid is
	14	* Copyrighted (C) 2001 by the Regents of the University of
	15	* California; see the COPYRIGHT file for full details. Squid
	16	* incorporates software developed and/or copyrighted by other
	17	* sources; see the CREDITS file for full details.
	18	*
	19	* This program is free software; you can redistribute it and/or modify
	20	* it under the terms of the GNU General Public License as published by
	21	* the Free Software Foundation; either version 2 of the License, or
	22	* (at your option) any later version.
26ac0430	23	*
b9ae18aa	24	* This program is distributed in the hope that it will be useful,
	25	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	26	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	27	* GNU General Public License for more details.
26ac0430	28	*
b9ae18aa	29	* You should have received a copy of the GNU General Public License
	30	* along with this program; if not, write to the Free Software
	31	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
	32	*
	33	* Copyright (c) 2003, Robert Collins <robertc@squid-cache.org>
	34	*/
	35
582c2af2	36	#include "squid.h"
d841c88d	37	#include "comm/Loops.h"
b9ae18aa	38	#include "ConfigOption.h"
582c2af2	39	#include "DiskdIOStrategy.h"
b9ae18aa	40	#include "DiskIO/DiskFile.h"
	41	#include "DiskdFile.h"
	42	#include "diomsg.h"
582c2af2	43	#include "protos.h"
b9ae18aa	44	#include "Store.h"
e4f1fdae	45	#include "StatCounters.h"
985c86bc	46	#include "SquidTime.h"
b9ae18aa	47
582c2af2 FC	48	#include <sys/ipc.h>
	49	#include <sys/msg.h>
	50	#include <sys/shm.h>
21d845b1 FC	51	#if HAVE_ERRNO_H
	52	#include <errno.h>
	53	#endif
582c2af2	54
b9ae18aa	55	diskd_stats_t diskd_stats;
	56
	57	size_t DiskdIOStrategy::nextInstanceID (0);
	58	const int diomsg::msg_snd_rcv_sz = sizeof(diomsg) - sizeof(mtyp_t);
	59
	60	size_t
	61	DiskdIOStrategy::newInstance()
	62	{
	63	return ++nextInstanceID;
	64	}
	65
	66	bool
	67	DiskdIOStrategy::shedLoad()
	68	{
	69	/*
	70	* Fail on open() if there are too many requests queued.
	71	*/
	72
	73	if (away > magic1) {
bf8fe701	74	debugs(79, 3, "storeDiskdIO::shedLoad: Shedding, too many requests away");
b9ae18aa	75
	76	return true;
	77	}
	78
	79	return false;
	80	}
	81
	82	int
	83	DiskdIOStrategy::load()
	84	{
	85	/* Calculate the storedir load relative to magic2 on a scale of 0 .. 1000 */
	86	/* the parse function guarantees magic2 is positivie */
	87	return away * 1000 / magic2;
	88	}
	89
	90	void
	91	DiskdIOStrategy::openFailed()
	92	{
cb4185f1	93	++diskd_stats.open_fail_queue_len;
b9ae18aa	94	}
	95
	96	DiskFile::Pointer
	97	DiskdIOStrategy::newFile(char const *path)
	98	{
	99	if (shedLoad()) {
	100	openFailed();
	101	return NULL;
	102	}
	103
	104	return new DiskdFile (path, this);
	105	}
	106
	107	DiskdIOStrategy::DiskdIOStrategy() : magic1(64), magic2(72), away(0) , smsgid(-1), rmsgid(-1), wfd(-1) , instanceID(newInstance())
	108	{}
	109
c521ad17 DK	110	bool
	111	DiskdIOStrategy::unlinkdUseful() const
	112	{
	113	return true;
	114	}
	115
b9ae18aa	116	void
	117	DiskdIOStrategy::unlinkFile(char const *path)
	118	{
	119	if (shedLoad()) {
	120	/* Damn, we need to issue a sync unlink here :( */
bf8fe701	121	debugs(79, 2, "storeDiskUnlink: Out of queue space, sync unlink");
b9ae18aa	122	#if USE_UNLINKD
	123
	124	unlinkdUnlink(path);
b9ae18aa	125	#else
	126
	127	unlink(path);
	128	#endif
	129
	130	return;
	131	}
	132
	133	/* We can attempt a diskd unlink */
	134	int x;
	135
ee139403	136	ssize_t shm_offset;
b9ae18aa	137
	138	char *buf;
	139
	140	buf = (char *)shm.get(&shm_offset);
	141
	142	xstrncpy(buf, path, SHMBUF_BLKSZ);
	143
	144	x = send(_MQD_UNLINK,
	145	0,
	146	(StoreIOState::Pointer )NULL,
	147	0,
	148	0,
	149	shm_offset);
	150
	151	if (x < 0) {
e0236918	152	debugs(79, DBG_IMPORTANT, "storeDiskdSend UNLINK: " << xstrerror());
b9ae18aa	153	::unlink(buf); /* XXX EWW! */
	154	// shm.put (shm_offset);
	155	}
	156
cb4185f1	157	++diskd_stats.unlink.ops;
b9ae18aa	158	}
	159
	160	void
	161	DiskdIOStrategy::init()
	162	{
b5d712b5	163	int pid;
b5d712b5	164	void * hIpc;
b9ae18aa	165	int rfd;
	166	int ikey;
	167	const char *args[5];
	168	char skey1[32];
	169	char skey2[32];
	170	char skey3[32];
b7ac5457	171	Ip::Address localhost;
b9ae18aa	172
	173	ikey = (getpid() << 10) + (instanceID << 2);
	174	ikey &= 0x7fffffff;
	175	smsgid = msgget((key_t) ikey, 0700 \| IPC_CREAT);
	176
	177	if (smsgid < 0) {
fa84c01d	178	debugs(50, DBG_CRITICAL, "storeDiskdInit: msgget: " << xstrerror());
b9ae18aa	179	fatal("msgget failed");
	180	}
	181
	182	rmsgid = msgget((key_t) (ikey + 1), 0700 \| IPC_CREAT);
	183
	184	if (rmsgid < 0) {
fa84c01d	185	debugs(50, DBG_CRITICAL, "storeDiskdInit: msgget: " << xstrerror());
b9ae18aa	186	fatal("msgget failed");
	187	}
	188
	189	shm.init(ikey, magic2);
	190	snprintf(skey1, 32, "%d", ikey);
	191	snprintf(skey2, 32, "%d", ikey + 1);
	192	snprintf(skey3, 32, "%d", ikey + 2);
	193	args[0] = "diskd";
	194	args[1] = skey1;
	195	args[2] = skey2;
	196	args[3] = skey3;
	197	args[4] = NULL;
cc192b50	198	localhost.SetLocalhost();
b5d712b5	199	pid = ipcCreate(IPC_STREAM,
	200	Config.Program.diskd,
	201	args,
	202	"diskd",
cc192b50	203	localhost,
b5d712b5	204	&rfd,
	205	&wfd,
	206	&hIpc);
	207
	208	if (pid < 0)
b9ae18aa	209	fatalf("execl: %s", Config.Program.diskd);
	210
	211	if (rfd != wfd)
	212	comm_close(rfd);
	213
	214	fd_note(wfd, "squid -> diskd");
	215
933dd095	216	commUnsetFdTimeout(wfd);
b9ae18aa	217	commSetNonBlocking(wfd);
d841c88d	218	Comm::QuickPollRequired();
b9ae18aa	219	}
	220
	221	/*
	222	* SHM manipulation routines
	223	*/
	224	void
ee139403	225	SharedMemory::put(ssize_t offset)
b9ae18aa	226	{
	227	int i;
	228	assert(offset >= 0);
	229	assert(offset < nbufs * SHMBUF_BLKSZ);
	230	i = offset / SHMBUF_BLKSZ;
	231	assert(i < nbufs);
	232	assert(CBIT_TEST(inuse_map, i));
	233	CBIT_CLR(inuse_map, i);
	234	--diskd_stats.shmbuf_count;
	235	}
	236
	237	void *
	238
ee139403	239	SharedMemory::get(ssize_t * shm_offset)
b9ae18aa	240	{
	241	char *aBuf = NULL;
	242	int i;
	243
cb4185f1	244	for (i = 0; i < nbufs; ++i) {
b9ae18aa	245	if (CBIT_TEST(inuse_map, i))
	246	continue;
	247
	248	CBIT_SET(inuse_map, i);
	249
	250	shm_offset = i SHMBUF_BLKSZ;
	251
	252	aBuf = buf + (*shm_offset);
	253
	254	break;
	255	}
	256
	257	assert(aBuf);
	258	assert(aBuf >= buf);
	259	assert(aBuf < buf + (nbufs * SHMBUF_BLKSZ));
cb4185f1	260	++diskd_stats.shmbuf_count;
b9ae18aa	261
	262	if (diskd_stats.max_shmuse < diskd_stats.shmbuf_count)
	263	diskd_stats.max_shmuse = diskd_stats.shmbuf_count;
	264
	265	return aBuf;
	266	}
	267
	268	void
	269	SharedMemory::init(int ikey, int magic2)
	270	{
	271	nbufs = (int)(magic2 * 1.3);
	272	id = shmget((key_t) (ikey + 2),
	273	nbufs * SHMBUF_BLKSZ, 0600 \| IPC_CREAT);
	274
	275	if (id < 0) {
fa84c01d	276	debugs(50, DBG_CRITICAL, "storeDiskdInit: shmget: " << xstrerror());
b9ae18aa	277	fatal("shmget failed");
	278	}
	279
	280	buf = (char *)shmat(id, NULL, 0);
	281
	282	if (buf == (void *) -1) {
fa84c01d	283	debugs(50, DBG_CRITICAL, "storeDiskdInit: shmat: " << xstrerror());
b9ae18aa	284	fatal("shmat failed");
	285	}
	286
	287	inuse_map = (char *)xcalloc((nbufs + 7) / 8, 1);
	288	diskd_stats.shmbuf_count += nbufs;
	289
cb4185f1	290	for (int i = 0; i < nbufs; ++i) {
b9ae18aa	291	CBIT_SET(inuse_map, i);
	292	put (i * SHMBUF_BLKSZ);
	293	}
	294	}
	295
	296	void
	297	DiskdIOStrategy::unlinkDone(diomsg * M)
	298	{
bf8fe701	299	debugs(79, 3, "storeDiskdUnlinkDone: file " << shm.buf + M->shm_offset << " status " << M->status);
e4f1fdae	300	++statCounter.syscalls.disk.unlinks;
b9ae18aa	301
b9ae18aa	302	if (M->status < 0)
cb4185f1	303	++diskd_stats.unlink.fail;
b9ae18aa	304	else
cb4185f1	305	++diskd_stats.unlink.success;
b9ae18aa	306	}
	307
	308	void
	309	DiskdIOStrategy::handle(diomsg * M)
	310	{
	311	if (!cbdataReferenceValid (M->callback_data)) {
	312	/* I.e. already closed file
	313	* - say when we have a error opening after
	314	* a read was already queued
	315	*/
26ac0430	316	debugs(79, 3, "storeDiskdHandle: Invalid callback_data " << M->callback_data);
b9ae18aa	317	cbdataReferenceDone (M->callback_data);
	318	return;
	319	}
	320
a1ad81aa	321	/* set errno passed from diskd. makes debugging more meaningful */
	322	if (M->status < 0)
	323	errno = -M->status;
	324
b9ae18aa	325	if (M->newstyle) {
	326	DiskdFile theFile = (DiskdFile )M->callback_data;
	327	theFile->RefCountDereference();
	328	theFile->completed (M);
	329	} else
	330	switch (M->mtype) {
	331
	332	case _MQD_OPEN:
	333
	334	case _MQD_CREATE:
	335
	336	case _MQD_CLOSE:
	337
	338	case _MQD_READ:
	339
	340	case _MQD_WRITE:
	341	assert (0);
	342	break;
	343
	344	case _MQD_UNLINK:
	345	unlinkDone(M);
	346	break;
	347
	348	default:
	349	assert(0);
	350	break;
	351	}
	352
	353	cbdataReferenceDone (M->callback_data);
	354	}
	355
	356	int
ee139403	357	DiskdIOStrategy::send(int mtype, int id, DiskdFile theFile, size_t size, off_t offset, ssize_t shm_offset, RefCountable_ requestor)
b9ae18aa	358	{
b9ae18aa	359	diomsg M;
b9ae18aa	360	M.callback_data = cbdataReference(theFile);
	361	theFile->RefCountReference();
	362	M.requestor = requestor;
f30dcf2a	363	M.newstyle = true;
b9ae18aa	364
	365	if (requestor)
	366	requestor->RefCountReference();
	367
f30dcf2a	368	return SEND(&M, mtype, id, size, offset, shm_offset);
b9ae18aa	369	}
	370
	371	int
63be0a78	372	DiskdIOStrategy::send(int mtype, int id, RefCount<StoreIOState> sio, size_t size, off_t offset, ssize_t shm_offset)
b9ae18aa	373	{
b9ae18aa	374	diomsg M;
f30dcf2a	375	M.callback_data = cbdataReference(sio.getRaw());
	376	M.newstyle = false;
	377
	378	return SEND(&M, mtype, id, size, offset, shm_offset);
	379	}
	380
	381	int
ee139403	382	DiskdIOStrategy::SEND(diomsg *M, int mtype, int id, size_t size, off_t offset, ssize_t shm_offset)
f30dcf2a	383	{
b9ae18aa	384	static int send_errors = 0;
	385	static int last_seq_no = 0;
	386	static int seq_no = 0;
f30dcf2a	387	int x;
	388
	389	M->mtype = mtype;
	390	M->size = size;
	391	M->offset = offset;
	392	M->status = -1;
	393	M->shm_offset = (int) shm_offset;
	394	M->id = id;
	395	M->seq_no = ++seq_no;
b9ae18aa	396
f30dcf2a	397	if (M->seq_no < last_seq_no)
e0236918	398	debugs(79, DBG_IMPORTANT, "WARNING: sequencing out of order");
b9ae18aa	399
f30dcf2a	400	x = msgsnd(smsgid, M, diomsg::msg_snd_rcv_sz, IPC_NOWAIT);
b9ae18aa	401
f30dcf2a	402	last_seq_no = M->seq_no;
b9ae18aa	403
b9ae18aa	404	if (0 == x) {
cb4185f1 FC	405	++diskd_stats.sent_count;
cb4185f1 FC	406	++away;
b9ae18aa	407	} else {
e0236918	408	debugs(79, DBG_IMPORTANT, "storeDiskdSend: msgsnd: " << xstrerror());
f30dcf2a	409	cbdataReferenceDone(M->callback_data);
b9ae18aa	410	assert(++send_errors < 100);
6a786390	411	if (shm_offset > -1)
6a786390	412	shm.put(shm_offset);
b9ae18aa	413	}
	414
	415	/*
	416	* We have to drain the queue here if necessary. If we don't,
	417	* then we can have a lot of messages in the queue (probably
	418	* up to 2*magic1) and we can run out of shared memory buffers.
	419	*/
	420	/*
c8f4eac4	421	* Note that we call Store::Root().callbackk (for all SDs), rather
c8f4eac4	422	* than callback for just this SD, so that while
b9ae18aa	423	* we're "blocking" on this SD we can also handle callbacks
	424	* from other SDs that might be ready.
	425	*/
b9ae18aa	426
9a518127	427	struct timeval delay = {0, 1};
b9ae18aa	428
9a518127	429	while (away > magic2) {
b9ae18aa	430	select(0, NULL, NULL, NULL, &delay);
c8f4eac4	431	Store::Root().callback();
b9ae18aa	432
	433	if (delay.tv_usec < 1000000)
	434	delay.tv_usec <<= 1;
	435	}
	436
	437	return x;
	438	}
	439
	440	ConfigOption *
	441	DiskdIOStrategy::getOptionTree() const
	442	{
	443	ConfigOptionVector *result = new ConfigOptionVector;
	444	result->options.push_back(new ConfigOptionAdapter<DiskdIOStrategy>(const_cast<DiskdIOStrategy >(this), &DiskdIOStrategy::optionQ1Parse, &DiskdIOStrategy::optionQ1Dump));
	445	result->options.push_back(new ConfigOptionAdapter<DiskdIOStrategy>(const_cast<DiskdIOStrategy >(this), &DiskdIOStrategy::optionQ2Parse, &DiskdIOStrategy::optionQ2Dump));
	446	return result;
	447	}
	448
	449	bool
350e2aec	450	DiskdIOStrategy::optionQ1Parse(const char name, const char value, int isaReconfig)
b9ae18aa	451	{
	452	if (strcmp(name, "Q1") != 0)
	453	return false;
	454
	455	int old_magic1 = magic1;
	456
	457	magic1 = atoi(value);
	458
350e2aec	459	if (!isaReconfig)
b9ae18aa	460	return true;
	461
	462	if (old_magic1 < magic1) {
	463	/*
	464	* This is because shm.nbufs is computed at startup, when
	465	* we call shmget(). We can't increase the Q1/Q2 parameters
	466	* beyond their initial values because then we might have
	467	* more "Q2 messages" than shared memory chunks, and this
	468	* will cause an assertion in storeDiskdShmGet().
	469	*/
	470	/* TODO: have DiskdIO hold a link to the swapdir, to allow detailed reporting again */
e0236918	471	debugs(3, DBG_IMPORTANT, "WARNING: cannot increase cache_dir Q1 value while Squid is running.");
b9ae18aa	472	magic1 = old_magic1;
	473	return true;
	474	}
	475
	476	if (old_magic1 != magic1)
e0236918	477	debugs(3, DBG_IMPORTANT, "cache_dir new Q1 value '" << magic1 << "'");
b9ae18aa	478
	479	return true;
	480	}
	481
	482	void
	483	DiskdIOStrategy::optionQ1Dump(StoreEntry * e) const
	484	{
	485	storeAppendPrintf(e, " Q1=%d", magic1);
	486	}
	487
	488	bool
350e2aec	489	DiskdIOStrategy::optionQ2Parse(const char name, const char value, int isaReconfig)
b9ae18aa	490	{
	491	if (strcmp(name, "Q2") != 0)
	492	return false;
	493
	494	int old_magic2 = magic2;
	495
	496	magic2 = atoi(value);
	497
350e2aec	498	if (!isaReconfig)
b9ae18aa	499	return true;
	500
	501	if (old_magic2 < magic2) {
	502	/* See comments in Q1 function above */
e0236918	503	debugs(3, DBG_IMPORTANT, "WARNING: cannot increase cache_dir Q2 value while Squid is running.");
b9ae18aa	504	magic2 = old_magic2;
	505	return true;
	506	}
	507
	508	if (old_magic2 != magic2)
e0236918	509	debugs(3, DBG_IMPORTANT, "cache_dir new Q2 value '" << magic2 << "'");
b9ae18aa	510
	511	return true;
	512	}
	513
	514	void
	515	DiskdIOStrategy::optionQ2Dump(StoreEntry * e) const
	516	{
	517	storeAppendPrintf(e, " Q2=%d", magic2);
	518	}
	519
	520	/*
	521	* Sync any pending data. We just sit around and read the queue
	522	* until the data has finished writing.
	523	*/
	524	void
	525	DiskdIOStrategy::sync()
	526	{
	527	static time_t lastmsg = 0;
	528
	529	while (away > 0) {
	530	if (squid_curtime > lastmsg) {
e0236918	531	debugs(47, DBG_IMPORTANT, "storeDiskdDirSync: " << away << " messages away");
b9ae18aa	532	lastmsg = squid_curtime;
	533	}
	534
	535	callback();
	536	}
	537	}
	538
b9ae18aa	539	/*
	540	* Handle callbacks. If we have more than magic2 requests away, we block
	541	* until the queue is below magic2. Otherwise, we simply return when we
	542	* don't get a message.
	543	*/
	544
	545	int
	546	DiskdIOStrategy::callback()
	547	{
	548	diomsg M;
	549	int x;
	550	int retval = 0;
	551
	552	if (away >= magic2) {
cb4185f1	553	++diskd_stats.block_queue_len;
b9ae18aa	554	retval = 1;
	555	/* We might not have anything to do, but our queue
	556	* is full.. */
	557	}
	558
	559	if (diskd_stats.sent_count - diskd_stats.recv_count >
	560	diskd_stats.max_away) {
	561	diskd_stats.max_away = diskd_stats.sent_count - diskd_stats.recv_count;
	562	}
	563
	564	while (1) {
	565	#ifdef ALWAYS_ZERO_BUFFERS
	566	memset(&M, '\0', sizeof(M));
	567	#endif
	568
	569	x = msgrcv(rmsgid, &M, diomsg::msg_snd_rcv_sz, 0, IPC_NOWAIT);
	570
	571	if (x < 0)
	572	break;
	573	else if (x != diomsg::msg_snd_rcv_sz) {
e0236918	574	debugs(47, DBG_IMPORTANT, "storeDiskdDirCallback: msgget returns " << x);
b9ae18aa	575	break;
	576	}
	577
cb4185f1	578	++diskd_stats.recv_count;
b9ae18aa	579	--away;
	580	handle(&M);
	581	retval = 1; /* Return that we've actually done some work */
	582
	583	if (M.shm_offset > -1)
	584	shm.put ((off_t) M.shm_offset);
	585	}
	586
	587	return retval;
	588	}
	589
	590	void
	591	DiskdIOStrategy::statfs(StoreEntry & sentry)const
	592	{
	593	storeAppendPrintf(&sentry, "Pending operations: %d\n", away);
	594	}